fix everything breaking due to 2b2efa3, and also fix library ordening when it would cause problems (closes #5)

This commit is contained in:
PoroCYon 2020-08-25 01:57:00 +02:00
parent 93c83f12b9
commit d3d3d1eb11
6 changed files with 441 additions and 207 deletions

View File

@ -18,8 +18,8 @@ enhancements and bugfixes by blackle.
`.text.startup._start`! Otherwise, the linker script will fail silently, and `.text.startup._start`! Otherwise, the linker script will fail silently, and
the smol startup/symbol resolving code will jump to an undefined location. the smol startup/symbol resolving code will jump to an undefined location.
***NOTE***: C++ exceptions, RTTI, global *external* variables, global ***NOTE***: C++ exceptions, RTTI, global *external* variables, thread-local
constructors and destructors (the ELF `.ctors`/`.dtors`/ storage, global constructors and destructors (the ELF `.ctors`/`.dtors`/
`attribute((con-/destructor))` things, not the C++ language constructs), ... `attribute((con-/destructor))` things, not the C++ language constructs), ...
aren't supported yet, and probably won't be anytime soon. aren't supported yet, and probably won't be anytime soon.

View File

@ -54,6 +54,8 @@ SECTIONS {
_smol_dataandbss_size = _smol_bss_end - _smol_data_start; _smol_dataandbss_size = _smol_bss_end - _smol_data_start;
_smol_total_memsize = . - _smol_origin;
/* Stabs debugging sections. */ /* Stabs debugging sections. */
.stab 0 : { *(.stab) } .stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) } .stabstr 0 : { *(.stabstr) }
@ -99,7 +101,5 @@ SECTIONS {
/DISCARD/ : { /DISCARD/ : {
*(.*) *(.*)
} }
_smol_total_memsize = . - _smol_origin;
} }

View File

@ -24,18 +24,25 @@ def get_min_check_width(libraries, hashfn):
def sort_imports(libraries, hashfn): def sort_imports(libraries, hashfn):
#eprintf("in: " + str(libraries)) #eprintf("in: " + str(libraries))
# sort libs by name length, then by name # DON'T DO THIS: weak symbol stuff etc.
ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0])) ## sort libs by name length, then by name
#ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0]))
for i in range(len(ll)): #for i in range(len(ll)):
# sort symbols by hash value # # sort symbols by hash value
ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0]))) # ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0])))
#eprintf("out:" + str(dict(ll))) #eprintf("out:" + str(dict(ll)))
# insertion order only works with python >=3.6! # insertion order only works with python >=3.6!
if sys.version_info < (3, 6): return OrderedDict(ll) #if sys.version_info < (3, 6): return OrderedDict(ll)
else: return dict(ll) #else: return dict(ll)
ll = libraries.items()
for k, v in ll:
libraries[k] = sorted(v, key=lambda sr: hashfn(sr[0]))
return libraries
def output_x86(libraries, nx, hashid, outf, det): def output_x86(libraries, nx, hashid, outf, det):
outf.write('; vim: set ft=nasm:\n') # be friendly outf.write('; vim: set ft=nasm:\n') # be friendly
@ -52,9 +59,10 @@ def output_x86(libraries, nx, hashid, outf, det):
outf.write('%%define HASH_END_TYP %s\n' % outf.write('%%define HASH_END_TYP %s\n' %
fetch_width_from_bits[get_min_check_width(libraries, hashfn)]) fetch_width_from_bits[get_min_check_width(libraries, hashfn)])
usedrelocs = set({}) usedrelocs = set()
for library, symrels in libraries.items(): for library, symrels in libraries.items():
for sym, reloc in symrels: usedrelocs.add(reloc) for sym, reloc in symrels.items():
usedrelocs.add(reloc)
if not(nx) and 'R_386_PC32' in usedrelocs and 'R_386_GOT32X' in usedrelocs: if not(nx) and 'R_386_PC32' in usedrelocs and 'R_386_GOT32X' in usedrelocs:
error("Using a mix of R_386_PC32 and R_386_GOT32X relocations! "+\ error("Using a mix of R_386_PC32 and R_386_GOT32X relocations! "+\
@ -94,7 +102,7 @@ dynamic.end:
outf.write('global _symbols\n') outf.write('global _symbols\n')
outf.write('_symbols:\n') outf.write('_symbols:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
for sym, reloc in symrels: for sym, reloc in symrels.items():
# meh # meh
if reloc != 'R_386_PC32' and reloc != 'R_386_GOT32X': if reloc != 'R_386_PC32' and reloc != 'R_386_GOT32X':
eprintf('Relocation type %s of symbol %s unsupported!' % (reloc, sym)) eprintf('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
@ -117,7 +125,7 @@ dynamic.end:
outf.write('global _smolplt\n') outf.write('global _smolplt\n')
outf.write('_smolplt:\n') outf.write('_smolplt:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
for sym, reloc in symrels: for sym, reloc in symrels.items():
outf.write("""\ outf.write("""\
[section .text.smolplt.{name}] [section .text.smolplt.{name}]
global {name} global {name}
@ -179,7 +187,7 @@ dynamic.end:
outf.write('global _symbols\n') outf.write('global _symbols\n')
outf.write('_symbols:\n') outf.write('_symbols:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
for sym, reloc in symrels: for sym, reloc in symrels.items():
if reloc not in ['R_X86_64_PLT32', 'R_X86_64_GOTPCRELX', \ if reloc not in ['R_X86_64_PLT32', 'R_X86_64_GOTPCRELX', \
'R_X86_64_REX_GOTPCRELX', 'R_X86_64_GOTPCREL']: 'R_X86_64_REX_GOTPCRELX', 'R_X86_64_GOTPCREL']:
error('Relocation type %s of symbol %s unsupported!' % (reloc, sym)) error('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
@ -200,7 +208,7 @@ global {name}
outf.write('global _smolplt\n') outf.write('global _smolplt\n')
outf.write('_smolplt:\n') outf.write('_smolplt:\n')
for library, symrels in libraries.items(): for library, symrels in libraries.items():
for sym, reloc in symrels: for sym, reloc in symrels.items():
if reloc == 'R_X86_64_PLT32': if reloc == 'R_X86_64_PLT32':
outf.write("""\ outf.write("""\
[section .text.smolplt.{name}] [section .text.smolplt.{name}]

View File

@ -1,19 +1,38 @@
import glob import glob
import os.path import os.path
import re
import subprocess import subprocess
import struct import struct
import sys import sys
import re from typing import NamedTuple, List, Dict, OrderedDict, Tuple, Set
from .shared import * from .shared import *
implicit_syms = { '_GLOBAL_OFFSET_TABLE_' }
unsupported_symtyp = { 'NOTYPE', 'TLS', 'OBJECT' } # TODO: support OBJECT, and maybe TLS too
class ExportSym(NamedTuple):
name: str
typ: str
scope: str
vis: str
ndx: str
def decide_arch(inpfiles): def decide_arch(inpfiles):
archs=set({}) archs = set()
for fp in inpfiles: for fp in inpfiles:
with open(fp, 'rb') as ff: with open(fp, 'rb') as ff:
_ = ff.read(16) # ei_ident magi = ff.read(4) # EI_MAGx of ei_ident
if magi != b'\x7fELF':
error("Input file '%s' is not an ELF file!" % fp)
_ = ff.read(12) # rest of ei_ident
_ = ff.read( 2) # ei_type _ = ff.read( 2) # ei_type
machine = ff.read(2) # ei_machine machine = ff.read(2) # ei_machine
@ -23,31 +42,43 @@ def decide_arch(inpfiles):
if len(archs) != 1: if len(archs) != 1:
error("Input files have multiple architectures, can't link this...") error("Input files have multiple architectures, can't link this...")
archn = list(archs)[0] archn = archs.pop()
if archn not in archmagic: if archn not in archmagic:
eprintf("Unknown architecture number " + str(archn) + \ eprintf("Unknown architecture number %d" + \
". Consult elf.h and rebuild your object files.") ". Consult elf.h and rebuild your object files." % archn)
return archmagic[archn] return archmagic[archn]
def build_reloc_typ_table(reo):
relocs = dict({}) def build_reloc_typ_table(reo) -> Dict[str, Set[str]]: # (symname, reloctyps) dict
relocs = {}
for s in reo.decode('utf-8').splitlines(): for s in reo.decode('utf-8').splitlines():
stuff = s.split() stuff = s.split()
# prolly a 'header' line # prolly a 'header' line
if len(stuff) < 5: if len(stuff) != 7 and len(stuff) != 5:
continue continue
# yes, we're assuming every reference to the same symbol will use the symname, reloctyp = stuff[4], stuff[2]
# same relocation type. if this isn't the case, your compiler flags are
# stupid if symname[0] == '.': # bleh
relocs[stuff[4]] = stuff[2] continue
relocs.setdefault(symname, set()).add(reloctyp)
# don't do that here, only check for import/external symbols (in get_needed_syms)
#if symname in relocs:
# rlc = relocs[symname]
# if rlc != reloctyp:
# error("E: symbol '%s' used with multiple relocation types! (%s <-> %s)"
# % (symname, reloctyp, rlc))
#else:
# relocs[symname] = reloctyp
return relocs return relocs
def has_lto_object(readelf_bin, files): def has_lto_object(readelf_bin, files):
for x in files: for x in files:
with open(x,'rb') as f: with open(x,'rb') as f:
@ -60,32 +91,55 @@ def has_lto_object(readelf_bin, files):
curfile = files[0] curfile = files[0]
for entry in output.decode('utf-8').splitlines(): for entry in output.decode('utf-8').splitlines():
stuff = entry.split() stuff = entry.split()
if len(stuff)<2: continue if len(stuff) < 2:
if stuff[0] == "File:": curfile = stuff[1] continue
if "__gnu_lto_" in entry or ".gnu.lto" in entry: # assuming nobody uses a symbol called "__gnu_lto_" ... if stuff[0] == "File:":
curfile = stuff[1]
# assuming nobody uses a symbol called "__gnu_lto_"...
if "__gnu_lto_" in entry or ".gnu.lto" in entry:
return True return True
return False return False
def get_needed_syms(readelf_bin, inpfile):
def get_needed_syms(readelf_bin, inpfile) -> Dict[str, str]: # (symname, reloctyp) dict
output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile], output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile],
stderr=subprocess.DEVNULL) stderr=subprocess.DEVNULL)
outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile], outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile],
stderr=subprocess.DEVNULL) stderr=subprocess.DEVNULL)
#eprintf(output.decode('utf-8'))
#eprintf(outrel.decode('utf-8'))
relocs = build_reloc_typ_table(outrel) relocs = build_reloc_typ_table(outrel)
curfile = inpfile curfile = inpfile
syms=set({}) syms = {}
for entry in output.decode('utf-8').splitlines(): for entry in output.decode('utf-8').splitlines():
stuff = entry.split() stuff = entry.split()
if len(stuff)<2: continue if len(stuff) < 2:
if stuff[0] == "File:": curfile = stuff[1] continue
if len(stuff)<8: continue if stuff[0] == "File:":
#if stuff[7].startswith("__gnu_lto_"): # yikes, an LTO object curfile = stuff[1]
# error("{} is an LTO object file, can't use this!".format(curfile)) if len(stuff) < 8:
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0 \ continue
and stuff[7] in relocs:
syms.add((stuff[7], relocs[stuff[7]])) scope, ndx, name = stuff[4], stuff[6], stuff[7]
if name.startswith("__gnu_lto_"): # yikes, an LTO object
error("E: {} is an LTO object file, can't use this!".format(curfile))
if scope == "GLOBAL" and ndx == "UND" and len(name) > 0:
if name in relocs:
rlt = relocs[name]
if len(rlt) > 1:
error("E: symbol '%s' has multiple relocations types?! (%s)"
% (name, ', '.join(rlt)))
#syms.add((name, rlt.pop()))
if name in syms:
assert False, ("??? %s" % name)
syms[name] = rlt.pop()
elif name not in implicit_syms:
error("E: symbol '%s' has no relocation type?!" % name)
#needgot = False #needgot = False
#if "_GLOBAL_OFFSET_TABLE_" in syms: #if "_GLOBAL_OFFSET_TABLE_" in syms:
@ -94,12 +148,14 @@ def get_needed_syms(readelf_bin, inpfile):
return syms#, needgot return syms#, needgot
def format_cc_path_line(entry): def format_cc_path_line(entry):
category, path = entry.split(': ', 1) category, path = entry.split(': ', 1)
path = path.lstrip('=') path = path.lstrip('=')
return (category, list(set(os.path.realpath(p) \ return (category, list(set(os.path.realpath(p) \
for p in path.split(':') if os.path.isdir(p)))) for p in path.split(':') if os.path.isdir(p))))
def get_cc_paths(cc_bin): def get_cc_paths(cc_bin):
bak = os.environ.copy() bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs! os.environ['LANG'] = "C" # DON'T output localized search dirs!
@ -122,6 +178,7 @@ def get_cc_paths(cc_bin):
return paths return paths
def get_cc_version(cc_bin): def get_cc_version(cc_bin):
bak = os.environ.copy() bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs! os.environ['LANG'] = "C" # DON'T output localized search dirs!
@ -137,23 +194,30 @@ def get_cc_version(cc_bin):
verstr = lines[0].split()[-1] verstr = lines[0].split()[-1]
return ("clang", tuple(map(int, verstr.split('.')))) return ("clang", tuple(map(int, verstr.split('.'))))
def is_valid_elf(f): # Good Enough(tm) def is_valid_elf(f): # Good Enough(tm)
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF' with open(f, 'rb') as ff:
return ff.read(4) == b'\x7FELF'
def find_lib(spaths, wanted): def find_lib(spaths, wanted):
for p in spaths: for p in spaths:
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'): for f in glob.glob(glob.escape('%s/lib%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f): return f if os.path.isfile(f) and is_valid_elf(f):
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'): return f
if os.path.isfile(f) and is_valid_elf(f): return f for f in glob.glob(glob.escape('%s/%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f #for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f #for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
error("E: couldn't find library '" + wanted + "'.") error("E: couldn't find library '%s'." % wanted)
def find_libs(spaths, wanted): def find_libs(spaths, wanted):
return [find_lib(spaths, l) for l in wanted] return [find_lib(spaths, l) for l in wanted]
def list_symbols(readelf_bin, lib): def list_symbols(readelf_bin, lib):
out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL) out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL)
@ -175,14 +239,154 @@ def list_symbols(readelf_bin, lib):
return symbols return symbols
def build_symbol_map(readelf_bin, libraries):
# create dictionary that maps symbols to libraries that provide them def build_symbol_map(readelf_bin, libraries) -> Dict[str, Dict[str, ExportSym]]:
symbol_map = {} # create dictionary that maps symbols to libraries that provide them, and their metadata
for lib in libraries: symbol_map = {} # symname -> (lib, exportsym)
symbols = list_symbols(readelf_bin, lib)
for symbol in symbols: out = subprocess.check_output([readelf_bin, '-sW', *libraries], stderr=subprocess.DEVNULL)
if symbol not in symbol_map:
symbol_map[symbol] = set() lines = out.decode('utf-8').splitlines()
soname = lib.split("/")[-1] curfile = libraries[0]
symbol_map[symbol].add(soname) soname = curfile.split("/")[-1]
for line in lines:
fields = line.split()
if len(fields) < 2:
continue
if fields[0] == "File:":
curfile = fields[1]
soname = curfile.split("/")[-1]
if len(fields) != 8:
continue
typ, scope, vis, ndx, name = fields[3:8]
if vis != "DEFAULT" \
or scope == "LOCAL": #\
#or (ndx == "UND" and scope != "WEAK"):# \ # nah, that one's done further down the line as well
#or typ in unsupported_symtym:
# ^ except, for the last case, we're going to emit proper errors later on
continue
# strip away GLIBC versions
name = re.sub(r"@@.*$", "", name)
symbol_map.setdefault(name, {})[soname] = ExportSym(name, typ, scope, vis, ndx)
return symbol_map return symbol_map
# this ordening is specific to ONE symbol!
def build_preferred_lib_order(sym, libs: Dict[str, ExportSym]) -> List[Tuple[str, ExportSym]]:
# libs: lib -> syminfo
realdefs = [(k, v) for k, v in libs.items() if v.scope != "WEAK"]
weakdefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx != "UND"]
weakunddefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx == "UND"]
#assert len(realdefs) + len(weakdefs) + len(weakunddefs) == len(libs)
if len(realdefs) > 1 or (len(realdefs) == 0 and len(weakdefs) > 1):
error("E: symbol '%s' defined non-weakly in multiple libraries! (%s)"
% (sym, ', '.join(libs.keys())))
if len(realdefs) == 0 and len(weakdefs) == 0: # must be in weakunddefs
error("E: no default weak implementation found for symbol '%s'" % sym)
return realdefs + weakdefs + weakunddefs
def has_good_subordening(needles, haystack):
haylist = [x[0] for x in haystack]
prevind = 0
for k, _ in needles:
curind = None
try:
curind = haylist.index(k)
except ValueError: # not in haystack --> eh, let's ignore
continue
if curind < prevind:
return False
prevind = curind
return True
def add_with_ordening(haystack: List[Tuple[str, Dict[str, str]]], # [(libname, (symname -> reloctyp))]
needles: List[Tuple[str, ExportSym]], # [(lib, syminfo)]
sym: str, reloc: str) \
-> List[Tuple[str, Dict[str, str]]]:
haylist = [x[0] for x in haystack]
startind = 0
for k, v in needles:
#eprintf("k=",k,"v=",v)
try:
newind = haylist.index(k)
assert newind >= startind, "???? (%d <= %d)" % (newind, startind)
startind = newind
symrelocdict = haystack[startind][1]
if v.name in symrelocdict:
assert False, "?????"
haystack[startind][1][v.name] = reloc
except ValueError: # not in haystack --> add!
startind = startind + 1
haystack.insert(startind, (k, {v.name:reloc}))
haylist.insert(startind, k)
return haystack
def resolve_extern_symbols(needed: Dict[str, List[str]], # symname -> reloctyps
available: Dict[str, Dict[str, ExportSym]], # symname -> (lib -> syminfo)
args) \
-> OrderedDict[str, Dict[str, str]]: # libname -> (symname -> reloctyp)
# first of all, we're going to check which needed symbols are provided by
# which libraries
bound = {} # sym -> (reloc, (lib -> syminfo))
for k, v in needed.items():
if k not in available:
error("E: symbol '%s' could not be found." % k)
bound[k] = (v, available[k])
# default ordening
bound = bound.items()
if args.det:
bound = sorted(bound, key=lambda kv: (len(kv[0]), kv[0]))
#eprintf("bound", bound)
liborder = [] # [(libname, (symname -> reloctyp))]
for k, v in bound: # k: sym (str)
# reloc: str
# libs: lib -> syminfo
reloc, libs = v[0], v[1]
if len(libs) <= 1:
continue
# preferred: [(lib, syminfo)]
preferred = build_preferred_lib_order(k, libs)
#eprintf("preferred",preferred)
if not has_good_subordening(preferred, liborder):
message = None
if args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you are STRONGLY advised to use `-fskip-zero-value'!"
if not args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you might want to enable `-fskip-zero-value'."
if message is not None:
eprintf(message % (', '.join(liborder.keys()), ', '.join(preferred.keys()), k))
liborder = add_with_ordening(liborder, preferred, k, reloc)
#eprintf("new order",liborder)
# add all those left without any possible preferred ordening
for k, v in bound:
reloc, libs = v[0], v[1]
if len(libs) == 0:
assert False, ("??? (%s)" % sym)
if len(libs) != 1:
continue
lib = libs.popitem() # (lib, syminfo)
liborder = add_with_ordening(liborder, [lib], k, reloc)
#eprintf("new order (no preference)",liborder)
#eprintf("ordered", liborder)
return OrderedDict(liborder)

222
smold.py
View File

@ -14,6 +14,124 @@ from smol.parse import *
from smol.emit import * from smol.emit import *
from smol.cnl import * from smol.cnl import *
def preproc_args(args):
if args.hash16 and args.crc32c: # shouldn't happen anymore
error("Cannot combine --hash16 and --crc32c!")
if args.debug:
args.cflags.append('-g')
args.ldflags.append('-g')
args.asflags.append('-g')
if args.hash16 or args.crc32c:
args.fuse_dnload_loader = True
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
for x in ['nasm','cc','readelf']:
val = args.__dict__[x]
if val is None or not os.path.isfile(val):
error("'%s' binary%s not found" %
(x, ("" if val is None else (" ('%s')" % val))))
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
if arch not in archmagic:
error("Unknown/unsupported architecture '%s'" % str(arch))
if args.verbose: eprintf("arch: %s" % str(arch))
if args.hash16 and arch not in ('i386', 3):
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
return args, arch
def do_smol_run(args, arch):
objinput = None
objinputistemp = False
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
if not args.gen_rt_only:
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
tmp_asm_fd = tmp_asm_file[0]
tmp_asm_file = tmp_asm_file[1]
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
os.close(tmp_elf_file[0])
tmp_elf_file = tmp_elf_file[1]
try:
#for inp in args.input:
# if not is_valid_elf(inp):
# error("Input file '%s' is not a valid ELF file!" % inp)
# if >1 input OR input is LTO object:
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
objinputistemp = True
os.close(fd)
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
else:
objinput = args.input[0]
# generate smol hashtab
cc_paths = get_cc_paths(args.cc)
syms = get_needed_syms(args.readelf, objinput)
spaths = args.libdir + cc_paths['libraries']
libraries = cc_paths['libraries']
libs = find_libs(spaths, args.library)
if args.verbose:
eprintf("libs = %s" % str(libs))
libs_symbol_map = build_symbol_map(args.readelf, libs)
#symbols = {}
#for symbol, reloc in syms:
# if symbol not in libs_symbol_map:
# error("could not find symbol: {}".format(symbol))
# libs_for_symbol = libs_symbol_map[symbol]
# if len(libs_for_symbol) > 1:
# error("E: the symbol '%s' is provided by more than one library: %s"
# % (symbol, str(libs_for_symbol)))
# library = libs_for_symbol.pop()
# symbols.setdefault(library, [])
# symbols[library].append((symbol, reloc))
symbols = resolve_extern_symbols(syms, libs_symbol_map, args)
with (open(args.output,'w') if args.gen_rt_only
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
if args.verbose:
eprintf("wrote symtab to %s" % tmp_asm_file)
if not args.gen_rt_only:
# assemble hash table/ELF header
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
tmp_asm_file, tmp_elf_file, args.asflags)
# link with LD into the final executable, w/ special linker script
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.output, args.ldflags, False)
if args.debugout is not None:
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.debugout, args.ldflags, True)
finally:
if not args.keeptmp:
if objinputistemp: os.remove(objinput)
if not args.gen_rt_only: os.remove(tmp_asm_file)
os.remove(tmp_elf_file)
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-m', '--target', default='', \ parser.add_argument('-m', '--target', default='', \
@ -146,109 +264,9 @@ def main():
args = parser.parse_args() args = parser.parse_args()
if args.hash16 and args.crc32c: # shouldn't happen anymore args, arch = preproc_args(args)
error("Cannot combine --hash16 and --crc32c!") do_smol_run(args, arch)
if args.debug:
args.cflags.append('-g')
args.ldflags.append('-g')
args.asflags.append('-g')
if args.hash16 or args.crc32c:
args.fuse_dnload_loader = True
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
for x in ['nasm','cc','readelf']:
val = args.__dict__[x]
if val is None or not os.path.isfile(val):
error("'%s' binary%s not found" %
(x, ("" if val is None else (" ('%s')" % val))))
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
if arch not in archmagic:
error("Unknown/unsupported architecture '%s'" % str(arch))
if args.verbose: eprintf("arch: %s" % str(arch))
if args.hash16 and arch not in ('i386', 3):
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
objinput = None
objinputistemp = False
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
if not args.gen_rt_only:
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
tmp_asm_fd = tmp_asm_file[0]
tmp_asm_file = tmp_asm_file[1]
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
os.close(tmp_elf_file[0])
tmp_elf_file = tmp_elf_file[1]
try:
# if >1 input OR input is LTO object:
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
objinputistemp = True
os.close(fd)
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
else: objinput = args.input[0]
# generate smol hashtab
cc_paths = get_cc_paths(args.cc)
syms = get_needed_syms(args.readelf, objinput)
spaths = args.libdir + cc_paths['libraries']
libraries = cc_paths['libraries']
libs = find_libs(spaths, args.library)
if args.verbose: eprintf("libs = %s" % str(libs))
libs_symbol_map = build_symbol_map(args.readelf, libs)
symbols = {}
for symbol, reloc in syms:
if symbol not in libs_symbol_map:
error("could not find symbol: {}".format(symbol))
libs_for_symbol = libs_symbol_map[symbol]
if len(libs_for_symbol) > 1:
error("E: the symbol '%s' is provided by more than one library: %s"
% (symbol, str(libs_for_symbol)))
library = libs_for_symbol.pop()
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
with (open(args.output,'w') if args.gen_rt_only
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
if args.verbose:
eprintf("wrote symtab to %s" % tmp_asm_file)
if not args.gen_rt_only:
# assemble hash table/ELF header
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
tmp_asm_file, tmp_elf_file, args.asflags)
# link with LD into the final executable, w/ special linker script
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.output, args.ldflags, False)
if args.debugout is not None:
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
args.debugout, args.ldflags, True)
finally:
if not args.keeptmp:
if objinputistemp: os.remove(objinput)
if not args.gen_rt_only: os.remove(tmp_asm_file)
os.remove(tmp_elf_file)
if __name__ == '__main__': if __name__ == '__main__':
rv = main() rv = main()

View File

@ -12,13 +12,15 @@ from smol.parse import *
def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1) def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1)
def readshort(blob, off):return struct.unpack('<H', blob[off:off+2])[0], (off+2)
def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4) def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4)
def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8) def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
def readstr(blob, off): def readstr(blob, off):
text = bytearray() text = bytearray()
while True: while True:
char, off = readbyte(blob, off) char, off = readbyte(blob, off)
if char == 0: break if char == 0:
break
text.append(char) text.append(char)
@ -34,7 +36,8 @@ def find_libs(deflibs, libname):
dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs
for d in dirs: for d in dirs:
for f in glob.glob(glob.escape(d + '/' + libname) + '*'): yield f for f in glob.glob(glob.escape("%s/%s" % (d, libname)) + '*'):
yield f
def build_hashtab(readelf_bin, lib, hashid): def build_hashtab(readelf_bin, lib, hashid):
symbols = list_symbols(readelf_bin, lib) symbols = list_symbols(readelf_bin, lib)
@ -79,7 +82,7 @@ def get_hashtbl(elf, blob, args):
txtoff = txtoff + 1 txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff) #eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0] htaddr, ___ = readint(blob, txtoff)
else: # 64-bit else: # 64-bit
txtoff = addr2off(elf, elf.entry) txtoff = addr2off(elf, elf.entry)
# scan for 'push IMM32' # scan for 'push IMM32'
@ -92,7 +95,7 @@ def get_hashtbl(elf, blob, args):
# except, this is actually the value we're looking for when the binary # except, this is actually the value we're looking for when the binary
# had been linked with -fuse-dnload-loader! so let's just check the # had been linked with -fuse-dnload-loader! so let's just check the
# value # value
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0] htaddr, ___ = readint(blob, txtoff)
#eprintf("ELF entry == 0x%08x" % elf.entry) #eprintf("ELF entry == 0x%08x" % elf.entry)
if htaddr == elf.entry: if htaddr == elf.entry:
@ -103,7 +106,7 @@ def get_hashtbl(elf, blob, args):
txtoff = txtoff + 1 txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff) #eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0] htaddr, ___ = readint(blob, txtoff)
else: else:
pass#eprintf("Hash table offset: 0x%08x?" % txtoff) pass#eprintf("Hash table offset: 0x%08x?" % txtoff)
@ -123,7 +126,7 @@ def get_hashtbl(elf, blob, args):
if len(blob) <= htoff and len(tbl) > 0: if len(blob) <= htoff and len(tbl) > 0:
break break
#if elf.is32bit: #if elf.is32bit:
if struct.unpack('<B', blob[htoff:htoff+1])[0] == 0: if readbyte(blob, htoff)[0] == 0:
break break
else: else:
assert False, "AAAAA rest is %s" % repr(blob[htoff:]) assert False, "AAAAA rest is %s" % repr(blob[htoff:])
@ -132,15 +135,52 @@ def get_hashtbl(elf, blob, args):
# break # break
# else: # else:
# assert False, "AAAAA rest is %s" % repr(blob[htoff:]) # assert False, "AAAAA rest is %s" % repr(blob[htoff:])
val = struct.unpack(('<I' if hashsz == 4 else '<H'), val, ___ = (readshort if hashsz == 2 else readint)(blob, htoff)
blob[htoff:htoff+hashsz])[0] if (val & 0xFFFF) == 0:
if (val & 0xFFFF) == 0: break break
tbl.append(val) tbl.append(val)
#eprintf("sym %08x" % val) #eprintf("sym %08x" % val)
htoff = htoff + (4 if elf.is32bit else 8) htoff = htoff + (4 if elf.is32bit else 8)
return tbl return tbl
def do_smoldd_run(args):
blob = args.input.read()
elf = hackyelf.parse(blob)
deflibs = get_def_libpaths(args.cc, elf.is32bit)
needed = get_needed_libs(elf, blob)
neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
htbl = get_hashtbl(elf, blob, args)
hashid = get_hash_id(args.hash16, args.crc32c)
libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
hashresolves = dict({})
noresolves = []
for x in htbl:
done = False
for l, v in libhashes.items():
if x in v:
hashresolves.setdefault(l, {})[x] = v[x]
done = True
break
if not done:
noresolves.append(x)
for l, v in hashresolves.items():
print("%s:" % l)
for x in v.keys():
print("\t%08x -> %s" % (x, v[x]))
if len(noresolves) > 0:
print("UNRESOLVED:")
for x in noresolves:
print("\t%08x" % x)
return 0
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('input', type=argparse.FileType('rb'), parser.add_argument('input', type=argparse.FileType('rb'),
@ -162,43 +202,7 @@ def main():
help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.") help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
args = parser.parse_args() args = parser.parse_args()
blob = args.input.read() return do_smoldd_run(args)
elf = hackyelf.parse(blob)
deflibs = get_def_libpaths(args.cc, elf.is32bit)
needed = get_needed_libs(elf, blob)
neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
htbl = get_hashtbl(elf, blob, args)
hashid = get_hash_id(args.hash16, args.crc32c)
libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
hashresolves = dict({})
noresolves = []
# TODO: group by libs
for x in htbl:
done = False
for l in libhashes.keys():
v = libhashes[l]
if x in v:
if l not in hashresolves: hashresolves[l] = dict({})
hashresolves[l][x] = v[x]
done = True
break
if not done: noresolves.append(x)
for l in hashresolves.keys():
print("%s:" % l)
v = hashresolves[l]
for x in v.keys():
print("\t%08x -> %s" % (x, v[x]))
if len(noresolves) > 0:
print("UNRESOLVED:")
for x in noresolves: print("\t%08x" % x)
return 0
if __name__ == '__main__': if __name__ == '__main__':
rv = main() rv = main()