Browse Source

fix everything breaking due to 2b2efa3, and also fix library ordening when it would cause problems (closes #5)

pull/17/head
PoroCYon 1 year ago
parent
commit
d3d3d1eb11
6 changed files with 434 additions and 200 deletions
  1. +2
    -2
      README.md
  2. +2
    -2
      ld/link_common.ld
  3. +21
    -13
      smol/emit.py
  4. +246
    -42
      smol/parse.py
  5. +120
    -102
      smold.py
  6. +43
    -39
      smoldd.py

+ 2
- 2
README.md View File

@ -18,8 +18,8 @@ enhancements and bugfixes by blackle.
`.text.startup._start`! Otherwise, the linker script will fail silently, and
the smol startup/symbol resolving code will jump to an undefined location.
***NOTE***: C++ exceptions, RTTI, global *external* variables, global
constructors and destructors (the ELF `.ctors`/`.dtors`/
***NOTE***: C++ exceptions, RTTI, global *external* variables, thread-local
storage, global constructors and destructors (the ELF `.ctors`/`.dtors`/
`attribute((con-/destructor))` things, not the C++ language constructs), ...
aren't supported yet, and probably won't be anytime soon.


+ 2
- 2
ld/link_common.ld View File

@ -54,6 +54,8 @@ SECTIONS {
_smol_dataandbss_size = _smol_bss_end - _smol_data_start;
_smol_total_memsize = . - _smol_origin;
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
@ -99,7 +101,5 @@ SECTIONS {
/DISCARD/ : {
*(.*)
}
_smol_total_memsize = . - _smol_origin;
}

+ 21
- 13
smol/emit.py View File

@ -24,18 +24,25 @@ def get_min_check_width(libraries, hashfn):
def sort_imports(libraries, hashfn):
#eprintf("in: " + str(libraries))
# sort libs by name length, then by name
ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0]))
# DON'T DO THIS: weak symbol stuff etc.
## sort libs by name length, then by name
#ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0]))
for i in range(len(ll)):
# sort symbols by hash value
ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0])))
#for i in range(len(ll)):
# # sort symbols by hash value
# ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0])))
#eprintf("out:" + str(dict(ll)))
# insertion order only works with python >=3.6!
if sys.version_info < (3, 6): return OrderedDict(ll)
else: return dict(ll)
#if sys.version_info < (3, 6): return OrderedDict(ll)
#else: return dict(ll)
ll = libraries.items()
for k, v in ll:
libraries[k] = sorted(v, key=lambda sr: hashfn(sr[0]))
return libraries
def output_x86(libraries, nx, hashid, outf, det):
outf.write('; vim: set ft=nasm:\n') # be friendly
@ -52,9 +59,10 @@ def output_x86(libraries, nx, hashid, outf, det):
outf.write('%%define HASH_END_TYP %s\n' %
fetch_width_from_bits[get_min_check_width(libraries, hashfn)])
usedrelocs = set({})
usedrelocs = set()
for library, symrels in libraries.items():
for sym, reloc in symrels: usedrelocs.add(reloc)
for sym, reloc in symrels.items():
usedrelocs.add(reloc)
if not(nx) and 'R_386_PC32' in usedrelocs and 'R_386_GOT32X' in usedrelocs:
error("Using a mix of R_386_PC32 and R_386_GOT32X relocations! "+\
@ -94,7 +102,7 @@ dynamic.end:
outf.write('global _symbols\n')
outf.write('_symbols:\n')
for library, symrels in libraries.items():
for sym, reloc in symrels:
for sym, reloc in symrels.items():
# meh
if reloc != 'R_386_PC32' and reloc != 'R_386_GOT32X':
eprintf('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
@ -117,7 +125,7 @@ dynamic.end:
outf.write('global _smolplt\n')
outf.write('_smolplt:\n')
for library, symrels in libraries.items():
for sym, reloc in symrels:
for sym, reloc in symrels.items():
outf.write("""\
[section .text.smolplt.{name}]
global {name}
@ -179,7 +187,7 @@ dynamic.end:
outf.write('global _symbols\n')
outf.write('_symbols:\n')
for library, symrels in libraries.items():
for sym, reloc in symrels:
for sym, reloc in symrels.items():
if reloc not in ['R_X86_64_PLT32', 'R_X86_64_GOTPCRELX', \
'R_X86_64_REX_GOTPCRELX', 'R_X86_64_GOTPCREL']:
error('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
@ -200,7 +208,7 @@ global {name}
outf.write('global _smolplt\n')
outf.write('_smolplt:\n')
for library, symrels in libraries.items():
for sym, reloc in symrels:
for sym, reloc in symrels.items():
if reloc == 'R_X86_64_PLT32':
outf.write("""\
[section .text.smolplt.{name}]


+ 246
- 42
smol/parse.py View File

@ -1,19 +1,38 @@
import glob
import os.path
import re
import subprocess
import struct
import sys
import re
from typing import NamedTuple, List, Dict, OrderedDict, Tuple, Set
from .shared import *
implicit_syms = { '_GLOBAL_OFFSET_TABLE_' }
unsupported_symtyp = { 'NOTYPE', 'TLS', 'OBJECT' } # TODO: support OBJECT, and maybe TLS too
class ExportSym(NamedTuple):
name: str
typ: str
scope: str
vis: str
ndx: str
def decide_arch(inpfiles):
archs=set({})
archs = set()
for fp in inpfiles:
with open(fp, 'rb') as ff:
_ = ff.read(16) # ei_ident
magi = ff.read(4) # EI_MAGx of ei_ident
if magi != b'\x7fELF':
error("Input file '%s' is not an ELF file!" % fp)
_ = ff.read(12) # rest of ei_ident
_ = ff.read( 2) # ei_type
machine = ff.read(2) # ei_machine
@ -23,31 +42,43 @@ def decide_arch(inpfiles):
if len(archs) != 1:
error("Input files have multiple architectures, can't link this...")
archn = list(archs)[0]
archn = archs.pop()
if archn not in archmagic:
eprintf("Unknown architecture number " + str(archn) + \
". Consult elf.h and rebuild your object files.")
eprintf("Unknown architecture number %d" + \
". Consult elf.h and rebuild your object files." % archn)
return archmagic[archn]
def build_reloc_typ_table(reo):
relocs = dict({})
def build_reloc_typ_table(reo) -> Dict[str, Set[str]]: # (symname, reloctyps) dict
relocs = {}
for s in reo.decode('utf-8').splitlines():
stuff = s.split()
# prolly a 'header' line
if len(stuff) < 5:
if len(stuff) != 7 and len(stuff) != 5:
continue
# yes, we're assuming every reference to the same symbol will use the
# same relocation type. if this isn't the case, your compiler flags are
# stupid
relocs[stuff[4]] = stuff[2]
symname, reloctyp = stuff[4], stuff[2]
if symname[0] == '.': # bleh
continue
relocs.setdefault(symname, set()).add(reloctyp)
# don't do that here, only check for import/external symbols (in get_needed_syms)
#if symname in relocs:
# rlc = relocs[symname]
# if rlc != reloctyp:
# error("E: symbol '%s' used with multiple relocation types! (%s <-> %s)"
# % (symname, reloctyp, rlc))
#else:
# relocs[symname] = reloctyp
return relocs
def has_lto_object(readelf_bin, files):
for x in files:
with open(x,'rb') as f:
@ -60,32 +91,55 @@ def has_lto_object(readelf_bin, files):
curfile = files[0]
for entry in output.decode('utf-8').splitlines():
stuff = entry.split()
if len(stuff)<2: continue
if stuff[0] == "File:": curfile = stuff[1]
if "__gnu_lto_" in entry or ".gnu.lto" in entry: # assuming nobody uses a symbol called "__gnu_lto_" ...
if len(stuff) < 2:
continue
if stuff[0] == "File:":
curfile = stuff[1]
# assuming nobody uses a symbol called "__gnu_lto_"...
if "__gnu_lto_" in entry or ".gnu.lto" in entry:
return True
return False
def get_needed_syms(readelf_bin, inpfile):
def get_needed_syms(readelf_bin, inpfile) -> Dict[str, str]: # (symname, reloctyp) dict
output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile],
stderr=subprocess.DEVNULL)
outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile],
stderr=subprocess.DEVNULL)
#eprintf(output.decode('utf-8'))
#eprintf(outrel.decode('utf-8'))
relocs = build_reloc_typ_table(outrel)
curfile = inpfile
syms=set({})
syms = {}
for entry in output.decode('utf-8').splitlines():
stuff = entry.split()
if len(stuff)<2: continue
if stuff[0] == "File:": curfile = stuff[1]
if len(stuff)<8: continue
#if stuff[7].startswith("__gnu_lto_"): # yikes, an LTO object
# error("{} is an LTO object file, can't use this!".format(curfile))
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0 \
and stuff[7] in relocs:
syms.add((stuff[7], relocs[stuff[7]]))
if len(stuff) < 2:
continue
if stuff[0] == "File:":
curfile = stuff[1]
if len(stuff) < 8:
continue
scope, ndx, name = stuff[4], stuff[6], stuff[7]
if name.startswith("__gnu_lto_"): # yikes, an LTO object
error("E: {} is an LTO object file, can't use this!".format(curfile))
if scope == "GLOBAL" and ndx == "UND" and len(name) > 0:
if name in relocs:
rlt = relocs[name]
if len(rlt) > 1:
error("E: symbol '%s' has multiple relocations types?! (%s)"
% (name, ', '.join(rlt)))
#syms.add((name, rlt.pop()))
if name in syms:
assert False, ("??? %s" % name)
syms[name] = rlt.pop()
elif name not in implicit_syms:
error("E: symbol '%s' has no relocation type?!" % name)
#needgot = False
#if "_GLOBAL_OFFSET_TABLE_" in syms:
@ -94,12 +148,14 @@ def get_needed_syms(readelf_bin, inpfile):
return syms#, needgot
def format_cc_path_line(entry):
category, path = entry.split(': ', 1)
path = path.lstrip('=')
return (category, list(set(os.path.realpath(p) \
for p in path.split(':') if os.path.isdir(p))))
def get_cc_paths(cc_bin):
bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs!
@ -122,6 +178,7 @@ def get_cc_paths(cc_bin):
return paths
def get_cc_version(cc_bin):
bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs!
@ -137,23 +194,30 @@ def get_cc_version(cc_bin):
verstr = lines[0].split()[-1]
return ("clang", tuple(map(int, verstr.split('.'))))
def is_valid_elf(f): # Good Enough(tm)
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
with open(f, 'rb') as ff:
return ff.read(4) == b'\x7FELF'
def find_lib(spaths, wanted):
for p in spaths:
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f): return f
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f): return f
for f in glob.glob(glob.escape('%s/lib%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
for f in glob.glob(glob.escape('%s/%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
error("E: couldn't find library '" + wanted + "'.")
error("E: couldn't find library '%s'." % wanted)
def find_libs(spaths, wanted):
return [find_lib(spaths, l) for l in wanted]
def list_symbols(readelf_bin, lib):
out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL)
@ -175,14 +239,154 @@ def list_symbols(readelf_bin, lib):
return symbols
def build_symbol_map(readelf_bin, libraries):
# create dictionary that maps symbols to libraries that provide them
symbol_map = {}
for lib in libraries:
symbols = list_symbols(readelf_bin, lib)
for symbol in symbols:
if symbol not in symbol_map:
symbol_map[symbol] = set()
soname = lib.split("/")[-1]
symbol_map[symbol].add(soname)
def build_symbol_map(readelf_bin, libraries) -> Dict[str, Dict[str, ExportSym]]:
# create dictionary that maps symbols to libraries that provide them, and their metadata
symbol_map = {} # symname -> (lib, exportsym)
out = subprocess.check_output([readelf_bin, '-sW', *libraries], stderr=subprocess.DEVNULL)
lines = out.decode('utf-8').splitlines()
curfile = libraries[0]
soname = curfile.split("/")[-1]
for line in lines:
fields = line.split()
if len(fields) < 2:
continue
if fields[0] == "File:":
curfile = fields[1]
soname = curfile.split("/")[-1]
if len(fields) != 8:
continue
typ, scope, vis, ndx, name = fields[3:8]
if vis != "DEFAULT" \
or scope == "LOCAL": #\
#or (ndx == "UND" and scope != "WEAK"):# \ # nah, that one's done further down the line as well
#or typ in unsupported_symtym:
# ^ except, for the last case, we're going to emit proper errors later on
continue
# strip away GLIBC versions
name = re.sub(r"@@.*$", "", name)
symbol_map.setdefault(name, {})[soname] = ExportSym(name, typ, scope, vis, ndx)
return symbol_map
# this ordening is specific to ONE symbol!
def build_preferred_lib_order(sym, libs: Dict[str, ExportSym]) -> List[Tuple[str, ExportSym]]:
# libs: lib -> syminfo
realdefs = [(k, v) for k, v in libs.items() if v.scope != "WEAK"]
weakdefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx != "UND"]
weakunddefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx == "UND"]
#assert len(realdefs) + len(weakdefs) + len(weakunddefs) == len(libs)
if len(realdefs) > 1 or (len(realdefs) == 0 and len(weakdefs) > 1):
error("E: symbol '%s' defined non-weakly in multiple libraries! (%s)"
% (sym, ', '.join(libs.keys())))
if len(realdefs) == 0 and len(weakdefs) == 0: # must be in weakunddefs
error("E: no default weak implementation found for symbol '%s'" % sym)
return realdefs + weakdefs + weakunddefs
def has_good_subordening(needles, haystack):
haylist = [x[0] for x in haystack]
prevind = 0
for k, _ in needles:
curind = None
try:
curind = haylist.index(k)
except ValueError: # not in haystack --> eh, let's ignore
continue
if curind < prevind:
return False
prevind = curind
return True
def add_with_ordening(haystack: List[Tuple[str, Dict[str, str]]], # [(libname, (symname -> reloctyp))]
needles: List[Tuple[str, ExportSym]], # [(lib, syminfo)]
sym: str, reloc: str) \
-> List[Tuple[str, Dict[str, str]]]:
haylist = [x[0] for x in haystack]
startind = 0
for k, v in needles:
#eprintf("k=",k,"v=",v)
try:
newind = haylist.index(k)
assert newind >= startind, "???? (%d <= %d)" % (newind, startind)
startind = newind
symrelocdict = haystack[startind][1]
if v.name in symrelocdict:
assert False, "?????"
haystack[startind][1][v.name] = reloc
except ValueError: # not in haystack --> add!
startind = startind + 1
haystack.insert(startind, (k, {v.name:reloc}))
haylist.insert(startind, k)
return haystack
def resolve_extern_symbols(needed: Dict[str, List[str]], # symname -> reloctyps
available: Dict[str, Dict[str, ExportSym]], # symname -> (lib -> syminfo)
args) \
-> OrderedDict[str, Dict[str, str]]: # libname -> (symname -> reloctyp)
# first of all, we're going to check which needed symbols are provided by
# which libraries
bound = {} # sym -> (reloc, (lib -> syminfo))
for k, v in needed.items():
if k not in available:
error("E: symbol '%s' could not be found." % k)
bound[k] = (v, available[k])
# default ordening
bound = bound.items()
if args.det:
bound = sorted(bound, key=lambda kv: (len(kv[0]), kv[0]))
#eprintf("bound", bound)
liborder = [] # [(libname, (symname -> reloctyp))]
for k, v in bound: # k: sym (str)
# reloc: str
# libs: lib -> syminfo
reloc, libs = v[0], v[1]
if len(libs) <= 1:
continue
# preferred: [(lib, syminfo)]
preferred = build_preferred_lib_order(k, libs)
#eprintf("preferred",preferred)
if not has_good_subordening(preferred, liborder):
message = None
if args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you are STRONGLY advised to use `-fskip-zero-value'!"
if not args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you might want to enable `-fskip-zero-value'."
if message is not None:
eprintf(message % (', '.join(liborder.keys()), ', '.join(preferred.keys()), k))
liborder = add_with_ordening(liborder, preferred, k, reloc)
#eprintf("new order",liborder)
# add all those left without any possible preferred ordening
for k, v in bound:
reloc, libs = v[0], v[1]
if len(libs) == 0:
assert False, ("??? (%s)" % sym)
if len(libs) != 1:
continue
lib = libs.popitem() # (lib, syminfo)
liborder = add_with_ordening(liborder, [lib], k, reloc)
#eprintf("new order (no preference)",liborder)
#eprintf("ordered", liborder)
return OrderedDict(liborder)

+ 120
- 102
smold.py View File

@ -14,6 +14,124 @@ from smol.parse import *
from smol.emit import *
from smol.cnl import *
def preproc_args(args):
if args.hash16 and args.crc32c: # shouldn't happen anymore
error("Cannot combine --hash16 and --crc32c!")
if args.debug:
args.cflags.append('-g')
args.ldflags.append('-g')
args.asflags.append('-g')
if args.hash16 or args.crc32c:
args.fuse_dnload_loader = True
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
for x in ['nasm','cc','readelf']:
val = args.__dict__[x]
if val is None or not os.path.isfile(val):
error("'%s' binary%s not found" %
(x, ("" if val is None else (" ('%s')" % val))))
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
if arch not in archmagic:
error("Unknown/unsupported architecture '%s'" % str(arch))
if args.verbose: eprintf("arch: %s" % str(arch))
if args.hash16 and arch not in ('i386', 3):
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
return args, arch
def do_smol_run(args, arch):
objinput = None
objinputistemp = False
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
if not args.gen_rt_only:
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
tmp_asm_fd = tmp_asm_file[0]
tmp_asm_file = tmp_asm_file[1]
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
os.close(tmp_elf_file[0])
tmp_elf_file = tmp_elf_file[1]
try:
#for inp in args.input:
# if not is_valid_elf(inp):
# error("Input file '%s' is not a valid ELF file!" % inp)
# if >1 input OR input is LTO object:
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
objinputistemp = True
os.close(fd)
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
else:
objinput = args.input[0]
# generate smol hashtab
cc_paths = get_cc_paths(args.cc)
syms = get_needed_syms(args.readelf, objinput)
spaths = args.libdir + cc_paths['libraries']
libraries = cc_paths['libraries']
libs = find_libs(spaths, args.library)
if args.verbose:
eprintf("libs = %s" % str(libs))
libs_symbol_map = build_symbol_map(args.readelf, libs)
#symbols = {}
#for symbol, reloc in syms:
# if symbol not in libs_symbol_map:
# error("could not find symbol: {}".format(symbol))
# libs_for_symbol = libs_symbol_map[symbol]
# if len(libs_for_symbol) > 1:
# error("E: the symbol '%s' is provided by more than one library: %s"
# % (symbol, str(libs_for_symbol)))
# library = libs_for_symbol.pop()
# symbols.setdefault(library, [])
# symbols[library].append((symbol, reloc))
symbols = resolve_extern_symbols(syms, libs_symbol_map, args)
with (open(args.output,'w') if args.gen_rt_only
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
if args.verbose:
eprintf("wrote symtab to %s" % tmp_asm_file)
if not args.gen_rt_only:
# assemble hash table/ELF header
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
tmp_asm_file, tmp_elf_file, args.asflags)
# link with LD into the final executable, w/ special linker script
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.output, args.ldflags, False)
if args.debugout is not None:
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.debugout, args.ldflags, True)
finally:
if not args.keeptmp:
if objinputistemp: os.remove(objinput)
if not args.gen_rt_only: os.remove(tmp_asm_file)
os.remove(tmp_elf_file)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--target', default='', \
@ -146,109 +264,9 @@ def main():
args = parser.parse_args()
if args.hash16 and args.crc32c: # shouldn't happen anymore
error("Cannot combine --hash16 and --crc32c!")
if args.debug:
args.cflags.append('-g')
args.ldflags.append('-g')
args.asflags.append('-g')
if args.hash16 or args.crc32c:
args.fuse_dnload_loader = True
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
for x in ['nasm','cc','readelf']:
val = args.__dict__[x]
if val is None or not os.path.isfile(val):
error("'%s' binary%s not found" %
(x, ("" if val is None else (" ('%s')" % val))))
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
if arch not in archmagic:
error("Unknown/unsupported architecture '%s'" % str(arch))
if args.verbose: eprintf("arch: %s" % str(arch))
if args.hash16 and arch not in ('i386', 3):
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
objinput = None
objinputistemp = False
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
if not args.gen_rt_only:
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
tmp_asm_fd = tmp_asm_file[0]
tmp_asm_file = tmp_asm_file[1]
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
os.close(tmp_elf_file[0])
tmp_elf_file = tmp_elf_file[1]
try:
# if >1 input OR input is LTO object:
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
objinputistemp = True
os.close(fd)
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
else: objinput = args.input[0]
# generate smol hashtab
cc_paths = get_cc_paths(args.cc)
syms = get_needed_syms(args.readelf, objinput)
spaths = args.libdir + cc_paths['libraries']
libraries = cc_paths['libraries']
libs = find_libs(spaths, args.library)
if args.verbose: eprintf("libs = %s" % str(libs))
libs_symbol_map = build_symbol_map(args.readelf, libs)
symbols = {}
for symbol, reloc in syms:
if symbol not in libs_symbol_map:
error("could not find symbol: {}".format(symbol))
libs_for_symbol = libs_symbol_map[symbol]
if len(libs_for_symbol) > 1:
error("E: the symbol '%s' is provided by more than one library: %s"
% (symbol, str(libs_for_symbol)))
library = libs_for_symbol.pop()
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
with (open(args.output,'w') if args.gen_rt_only
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
if args.verbose:
eprintf("wrote symtab to %s" % tmp_asm_file)
if not args.gen_rt_only:
# assemble hash table/ELF header
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
tmp_asm_file, tmp_elf_file, args.asflags)
args, arch = preproc_args(args)
do_smol_run(args, arch)
# link with LD into the final executable, w/ special linker script
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.output, args.ldflags, False)
if args.debugout is not None:
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.debugout, args.ldflags, True)
finally:
if not args.keeptmp:
if objinputistemp: os.remove(objinput)
if not args.gen_rt_only: os.remove(tmp_asm_file)
os.remove(tmp_elf_file)
if __name__ == '__main__':
rv = main()


+ 43
- 39
smoldd.py View File

@ -12,13 +12,15 @@ from smol.parse import *
def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1)
def readshort(blob, off):return struct.unpack('<H', blob[off:off+2])[0], (off+2)
def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4)
def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
def readstr(blob, off):
text = bytearray()
while True:
char, off = readbyte(blob, off)
if char == 0: break
if char == 0:
break
text.append(char)
@ -34,7 +36,8 @@ def find_libs(deflibs, libname):
dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs
for d in dirs:
for f in glob.glob(glob.escape(d + '/' + libname) + '*'): yield f
for f in glob.glob(glob.escape("%s/%s" % (d, libname)) + '*'):
yield f
def build_hashtab(readelf_bin, lib, hashid):
symbols = list_symbols(readelf_bin, lib)
@ -79,7 +82,7 @@ def get_hashtbl(elf, blob, args):
txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
htaddr, ___ = readint(blob, txtoff)
else: # 64-bit
txtoff = addr2off(elf, elf.entry)
# scan for 'push IMM32'
@ -92,7 +95,7 @@ def get_hashtbl(elf, blob, args):
# except, this is actually the value we're looking for when the binary
# had been linked with -fuse-dnload-loader! so let's just check the
# value
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
htaddr, ___ = readint(blob, txtoff)
#eprintf("ELF entry == 0x%08x" % elf.entry)
if htaddr == elf.entry:
@ -103,7 +106,7 @@ def get_hashtbl(elf, blob, args):
txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
htaddr, ___ = readint(blob, txtoff)
else:
pass#eprintf("Hash table offset: 0x%08x?" % txtoff)
@ -123,7 +126,7 @@ def get_hashtbl(elf, blob, args):
if len(blob) <= htoff and len(tbl) > 0:
break
#if elf.is32bit:
if struct.unpack('<B', blob[htoff:htoff+1])[0] == 0:
if readbyte(blob, htoff)[0] == 0:
break
else:
assert False, "AAAAA rest is %s" % repr(blob[htoff:])
@ -132,36 +135,16 @@ def get_hashtbl(elf, blob, args):
# break
# else:
# assert False, "AAAAA rest is %s" % repr(blob[htoff:])
val = struct.unpack(('<I' if hashsz == 4 else '<H'),
blob[htoff:htoff+hashsz])[0]
if (val & 0xFFFF) == 0: break
val, ___ = (readshort if hashsz == 2 else readint)(blob, htoff)
if (val & 0xFFFF) == 0:
break
tbl.append(val)
#eprintf("sym %08x" % val)
htoff = htoff + (4 if elf.is32bit else 8)
return tbl
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input', type=argparse.FileType('rb'),
default=sys.stdin.buffer, help="input file")
parser.add_argument('--cc',
default=shutil.which('cc'), help="C compiler binary")
parser.add_argument('--readelf',
default=shutil.which('readelf'), help="readelf binary")
parser.add_argument('--map', type=argparse.FileType('r'), help=\
"Get the address of the symbol hash table from the "+\
"linker map output instead of attempting to parse the"+\
" binary.")
hashgrp = parser.add_mutually_exclusive_group()
hashgrp.add_argument('-s', '--hash16', default=False, action='store_true', \
help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\
"Only usable for 32-bit output.")
hashgrp.add_argument('-c', '--crc32c', default=False, action='store_true', \
help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
args = parser.parse_args()
def do_smoldd_run(args):
blob = args.input.read()
elf = hackyelf.parse(blob)
@ -176,30 +159,51 @@ def main():
hashresolves = dict({})
noresolves = []
# TODO: group by libs
for x in htbl:
done = False
for l in libhashes.keys():
v = libhashes[l]
for l, v in libhashes.items():
if x in v:
if l not in hashresolves: hashresolves[l] = dict({})
hashresolves[l][x] = v[x]
hashresolves.setdefault(l, {})[x] = v[x]
done = True
break
if not done: noresolves.append(x)
if not done:
noresolves.append(x)
for l in hashresolves.keys():
for l, v in hashresolves.items():
print("%s:" % l)
v = hashresolves[l]
for x in v.keys():
print("\t%08x -> %s" % (x, v[x]))
if len(noresolves) > 0:
print("UNRESOLVED:")
for x in noresolves: print("\t%08x" % x)
for x in noresolves:
print("\t%08x" % x)
return 0
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input', type=argparse.FileType('rb'),
default=sys.stdin.buffer, help="input file")
parser.add_argument('--cc',
default=shutil.which('cc'), help="C compiler binary")
parser.add_argument('--readelf',
default=shutil.which('readelf'), help="readelf binary")
parser.add_argument('--map', type=argparse.FileType('r'), help=\
"Get the address of the symbol hash table from the "+\
"linker map output instead of attempting to parse the"+\
" binary.")
hashgrp = parser.add_mutually_exclusive_group()
hashgrp.add_argument('-s', '--hash16', default=False, action='store_true', \
help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\
"Only usable for 32-bit output.")
hashgrp.add_argument('-c', '--crc32c', default=False, action='store_true', \
help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
args = parser.parse_args()
return do_smoldd_run(args)
if __name__ == '__main__':
rv = main()
if rv is None: pass


Loading…
Cancel
Save