mirror of https://github.com/Shizmob/smol
fix everything breaking due to 2b2efa3
, and also fix library ordening when it would cause problems (closes #5)
This commit is contained in:
parent
93c83f12b9
commit
d3d3d1eb11
|
@ -18,8 +18,8 @@ enhancements and bugfixes by blackle.
|
|||
`.text.startup._start`! Otherwise, the linker script will fail silently, and
|
||||
the smol startup/symbol resolving code will jump to an undefined location.
|
||||
|
||||
***NOTE***: C++ exceptions, RTTI, global *external* variables, global
|
||||
constructors and destructors (the ELF `.ctors`/`.dtors`/
|
||||
***NOTE***: C++ exceptions, RTTI, global *external* variables, thread-local
|
||||
storage, global constructors and destructors (the ELF `.ctors`/`.dtors`/
|
||||
`attribute((con-/destructor))` things, not the C++ language constructs), ...
|
||||
aren't supported yet, and probably won't be anytime soon.
|
||||
|
||||
|
|
|
@ -54,6 +54,8 @@ SECTIONS {
|
|||
|
||||
_smol_dataandbss_size = _smol_bss_end - _smol_data_start;
|
||||
|
||||
_smol_total_memsize = . - _smol_origin;
|
||||
|
||||
/* Stabs debugging sections. */
|
||||
.stab 0 : { *(.stab) }
|
||||
.stabstr 0 : { *(.stabstr) }
|
||||
|
@ -99,7 +101,5 @@ SECTIONS {
|
|||
/DISCARD/ : {
|
||||
*(.*)
|
||||
}
|
||||
|
||||
_smol_total_memsize = . - _smol_origin;
|
||||
}
|
||||
|
||||
|
|
34
smol/emit.py
34
smol/emit.py
|
@ -24,18 +24,25 @@ def get_min_check_width(libraries, hashfn):
|
|||
def sort_imports(libraries, hashfn):
|
||||
#eprintf("in: " + str(libraries))
|
||||
|
||||
# sort libs by name length, then by name
|
||||
ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0]))
|
||||
# DON'T DO THIS: weak symbol stuff etc.
|
||||
## sort libs by name length, then by name
|
||||
#ll = sorted(libraries.items(), key=lambda ls: (len(ls[0]), ls[0]))
|
||||
|
||||
for i in range(len(ll)):
|
||||
# sort symbols by hash value
|
||||
ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0])))
|
||||
#for i in range(len(ll)):
|
||||
# # sort symbols by hash value
|
||||
# ll[i] = (ll[i][0], sorted(ll[i][1], key=lambda sr: hashfn(sr[0])))
|
||||
|
||||
#eprintf("out:" + str(dict(ll)))
|
||||
|
||||
# insertion order only works with python >=3.6!
|
||||
if sys.version_info < (3, 6): return OrderedDict(ll)
|
||||
else: return dict(ll)
|
||||
#if sys.version_info < (3, 6): return OrderedDict(ll)
|
||||
#else: return dict(ll)
|
||||
|
||||
ll = libraries.items()
|
||||
for k, v in ll:
|
||||
libraries[k] = sorted(v, key=lambda sr: hashfn(sr[0]))
|
||||
|
||||
return libraries
|
||||
|
||||
def output_x86(libraries, nx, hashid, outf, det):
|
||||
outf.write('; vim: set ft=nasm:\n') # be friendly
|
||||
|
@ -52,9 +59,10 @@ def output_x86(libraries, nx, hashid, outf, det):
|
|||
outf.write('%%define HASH_END_TYP %s\n' %
|
||||
fetch_width_from_bits[get_min_check_width(libraries, hashfn)])
|
||||
|
||||
usedrelocs = set({})
|
||||
usedrelocs = set()
|
||||
for library, symrels in libraries.items():
|
||||
for sym, reloc in symrels: usedrelocs.add(reloc)
|
||||
for sym, reloc in symrels.items():
|
||||
usedrelocs.add(reloc)
|
||||
|
||||
if not(nx) and 'R_386_PC32' in usedrelocs and 'R_386_GOT32X' in usedrelocs:
|
||||
error("Using a mix of R_386_PC32 and R_386_GOT32X relocations! "+\
|
||||
|
@ -94,7 +102,7 @@ dynamic.end:
|
|||
outf.write('global _symbols\n')
|
||||
outf.write('_symbols:\n')
|
||||
for library, symrels in libraries.items():
|
||||
for sym, reloc in symrels:
|
||||
for sym, reloc in symrels.items():
|
||||
# meh
|
||||
if reloc != 'R_386_PC32' and reloc != 'R_386_GOT32X':
|
||||
eprintf('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
|
||||
|
@ -117,7 +125,7 @@ dynamic.end:
|
|||
outf.write('global _smolplt\n')
|
||||
outf.write('_smolplt:\n')
|
||||
for library, symrels in libraries.items():
|
||||
for sym, reloc in symrels:
|
||||
for sym, reloc in symrels.items():
|
||||
outf.write("""\
|
||||
[section .text.smolplt.{name}]
|
||||
global {name}
|
||||
|
@ -179,7 +187,7 @@ dynamic.end:
|
|||
outf.write('global _symbols\n')
|
||||
outf.write('_symbols:\n')
|
||||
for library, symrels in libraries.items():
|
||||
for sym, reloc in symrels:
|
||||
for sym, reloc in symrels.items():
|
||||
if reloc not in ['R_X86_64_PLT32', 'R_X86_64_GOTPCRELX', \
|
||||
'R_X86_64_REX_GOTPCRELX', 'R_X86_64_GOTPCREL']:
|
||||
error('Relocation type %s of symbol %s unsupported!' % (reloc, sym))
|
||||
|
@ -200,7 +208,7 @@ global {name}
|
|||
outf.write('global _smolplt\n')
|
||||
outf.write('_smolplt:\n')
|
||||
for library, symrels in libraries.items():
|
||||
for sym, reloc in symrels:
|
||||
for sym, reloc in symrels.items():
|
||||
if reloc == 'R_X86_64_PLT32':
|
||||
outf.write("""\
|
||||
[section .text.smolplt.{name}]
|
||||
|
|
288
smol/parse.py
288
smol/parse.py
|
@ -1,19 +1,38 @@
|
|||
|
||||
import glob
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import struct
|
||||
import sys
|
||||
import re
|
||||
from typing import NamedTuple, List, Dict, OrderedDict, Tuple, Set
|
||||
|
||||
from .shared import *
|
||||
|
||||
|
||||
implicit_syms = { '_GLOBAL_OFFSET_TABLE_' }
|
||||
unsupported_symtyp = { 'NOTYPE', 'TLS', 'OBJECT' } # TODO: support OBJECT, and maybe TLS too
|
||||
|
||||
|
||||
class ExportSym(NamedTuple):
|
||||
name: str
|
||||
typ: str
|
||||
scope: str
|
||||
vis: str
|
||||
ndx: str
|
||||
|
||||
|
||||
def decide_arch(inpfiles):
|
||||
archs=set({})
|
||||
archs = set()
|
||||
|
||||
for fp in inpfiles:
|
||||
with open(fp, 'rb') as ff:
|
||||
_ = ff.read(16) # ei_ident
|
||||
magi = ff.read(4) # EI_MAGx of ei_ident
|
||||
|
||||
if magi != b'\x7fELF':
|
||||
error("Input file '%s' is not an ELF file!" % fp)
|
||||
|
||||
_ = ff.read(12) # rest of ei_ident
|
||||
_ = ff.read( 2) # ei_type
|
||||
machine = ff.read(2) # ei_machine
|
||||
|
||||
|
@ -23,31 +42,43 @@ def decide_arch(inpfiles):
|
|||
if len(archs) != 1:
|
||||
error("Input files have multiple architectures, can't link this...")
|
||||
|
||||
archn = list(archs)[0]
|
||||
archn = archs.pop()
|
||||
|
||||
if archn not in archmagic:
|
||||
eprintf("Unknown architecture number " + str(archn) + \
|
||||
". Consult elf.h and rebuild your object files.")
|
||||
eprintf("Unknown architecture number %d" + \
|
||||
". Consult elf.h and rebuild your object files." % archn)
|
||||
|
||||
return archmagic[archn]
|
||||
|
||||
def build_reloc_typ_table(reo):
|
||||
relocs = dict({})
|
||||
|
||||
def build_reloc_typ_table(reo) -> Dict[str, Set[str]]: # (symname, reloctyps) dict
|
||||
relocs = {}
|
||||
|
||||
for s in reo.decode('utf-8').splitlines():
|
||||
stuff = s.split()
|
||||
|
||||
# prolly a 'header' line
|
||||
if len(stuff) < 5:
|
||||
if len(stuff) != 7 and len(stuff) != 5:
|
||||
continue
|
||||
|
||||
# yes, we're assuming every reference to the same symbol will use the
|
||||
# same relocation type. if this isn't the case, your compiler flags are
|
||||
# stupid
|
||||
relocs[stuff[4]] = stuff[2]
|
||||
symname, reloctyp = stuff[4], stuff[2]
|
||||
|
||||
if symname[0] == '.': # bleh
|
||||
continue
|
||||
|
||||
relocs.setdefault(symname, set()).add(reloctyp)
|
||||
# don't do that here, only check for import/external symbols (in get_needed_syms)
|
||||
#if symname in relocs:
|
||||
# rlc = relocs[symname]
|
||||
# if rlc != reloctyp:
|
||||
# error("E: symbol '%s' used with multiple relocation types! (%s <-> %s)"
|
||||
# % (symname, reloctyp, rlc))
|
||||
#else:
|
||||
# relocs[symname] = reloctyp
|
||||
|
||||
return relocs
|
||||
|
||||
|
||||
def has_lto_object(readelf_bin, files):
|
||||
for x in files:
|
||||
with open(x,'rb') as f:
|
||||
|
@ -60,32 +91,55 @@ def has_lto_object(readelf_bin, files):
|
|||
curfile = files[0]
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
stuff = entry.split()
|
||||
if len(stuff)<2: continue
|
||||
if stuff[0] == "File:": curfile = stuff[1]
|
||||
if "__gnu_lto_" in entry or ".gnu.lto" in entry: # assuming nobody uses a symbol called "__gnu_lto_" ...
|
||||
if len(stuff) < 2:
|
||||
continue
|
||||
if stuff[0] == "File:":
|
||||
curfile = stuff[1]
|
||||
|
||||
# assuming nobody uses a symbol called "__gnu_lto_"...
|
||||
if "__gnu_lto_" in entry or ".gnu.lto" in entry:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_needed_syms(readelf_bin, inpfile):
|
||||
|
||||
def get_needed_syms(readelf_bin, inpfile) -> Dict[str, str]: # (symname, reloctyp) dict
|
||||
output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile],
|
||||
stderr=subprocess.DEVNULL)
|
||||
outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile],
|
||||
stderr=subprocess.DEVNULL)
|
||||
#eprintf(output.decode('utf-8'))
|
||||
#eprintf(outrel.decode('utf-8'))
|
||||
|
||||
relocs = build_reloc_typ_table(outrel)
|
||||
|
||||
curfile = inpfile
|
||||
syms=set({})
|
||||
syms = {}
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
stuff = entry.split()
|
||||
if len(stuff)<2: continue
|
||||
if stuff[0] == "File:": curfile = stuff[1]
|
||||
if len(stuff)<8: continue
|
||||
#if stuff[7].startswith("__gnu_lto_"): # yikes, an LTO object
|
||||
# error("{} is an LTO object file, can't use this!".format(curfile))
|
||||
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0 \
|
||||
and stuff[7] in relocs:
|
||||
syms.add((stuff[7], relocs[stuff[7]]))
|
||||
if len(stuff) < 2:
|
||||
continue
|
||||
if stuff[0] == "File:":
|
||||
curfile = stuff[1]
|
||||
if len(stuff) < 8:
|
||||
continue
|
||||
|
||||
scope, ndx, name = stuff[4], stuff[6], stuff[7]
|
||||
|
||||
if name.startswith("__gnu_lto_"): # yikes, an LTO object
|
||||
error("E: {} is an LTO object file, can't use this!".format(curfile))
|
||||
if scope == "GLOBAL" and ndx == "UND" and len(name) > 0:
|
||||
if name in relocs:
|
||||
rlt = relocs[name]
|
||||
if len(rlt) > 1:
|
||||
error("E: symbol '%s' has multiple relocations types?! (%s)"
|
||||
% (name, ', '.join(rlt)))
|
||||
#syms.add((name, rlt.pop()))
|
||||
if name in syms:
|
||||
assert False, ("??? %s" % name)
|
||||
syms[name] = rlt.pop()
|
||||
elif name not in implicit_syms:
|
||||
error("E: symbol '%s' has no relocation type?!" % name)
|
||||
|
||||
#needgot = False
|
||||
#if "_GLOBAL_OFFSET_TABLE_" in syms:
|
||||
|
@ -94,12 +148,14 @@ def get_needed_syms(readelf_bin, inpfile):
|
|||
|
||||
return syms#, needgot
|
||||
|
||||
|
||||
def format_cc_path_line(entry):
|
||||
category, path = entry.split(': ', 1)
|
||||
path = path.lstrip('=')
|
||||
return (category, list(set(os.path.realpath(p) \
|
||||
for p in path.split(':') if os.path.isdir(p))))
|
||||
|
||||
|
||||
def get_cc_paths(cc_bin):
|
||||
bak = os.environ.copy()
|
||||
os.environ['LANG'] = "C" # DON'T output localized search dirs!
|
||||
|
@ -122,6 +178,7 @@ def get_cc_paths(cc_bin):
|
|||
|
||||
return paths
|
||||
|
||||
|
||||
def get_cc_version(cc_bin):
|
||||
bak = os.environ.copy()
|
||||
os.environ['LANG'] = "C" # DON'T output localized search dirs!
|
||||
|
@ -137,23 +194,30 @@ def get_cc_version(cc_bin):
|
|||
verstr = lines[0].split()[-1]
|
||||
return ("clang", tuple(map(int, verstr.split('.'))))
|
||||
|
||||
|
||||
def is_valid_elf(f): # Good Enough(tm)
|
||||
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
|
||||
with open(f, 'rb') as ff:
|
||||
return ff.read(4) == b'\x7FELF'
|
||||
|
||||
|
||||
def find_lib(spaths, wanted):
|
||||
for p in spaths:
|
||||
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
for f in glob.glob(glob.escape('%s/lib%s' % (p, wanted)) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f):
|
||||
return f
|
||||
for f in glob.glob(glob.escape('%s/%s' % (p, wanted)) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f):
|
||||
return f
|
||||
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
|
||||
|
||||
error("E: couldn't find library '" + wanted + "'.")
|
||||
error("E: couldn't find library '%s'." % wanted)
|
||||
|
||||
|
||||
def find_libs(spaths, wanted):
|
||||
return [find_lib(spaths, l) for l in wanted]
|
||||
|
||||
|
||||
def list_symbols(readelf_bin, lib):
|
||||
out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL)
|
||||
|
||||
|
@ -175,14 +239,154 @@ def list_symbols(readelf_bin, lib):
|
|||
|
||||
return symbols
|
||||
|
||||
def build_symbol_map(readelf_bin, libraries):
|
||||
# create dictionary that maps symbols to libraries that provide them
|
||||
symbol_map = {}
|
||||
for lib in libraries:
|
||||
symbols = list_symbols(readelf_bin, lib)
|
||||
for symbol in symbols:
|
||||
if symbol not in symbol_map:
|
||||
symbol_map[symbol] = set()
|
||||
soname = lib.split("/")[-1]
|
||||
symbol_map[symbol].add(soname)
|
||||
|
||||
def build_symbol_map(readelf_bin, libraries) -> Dict[str, Dict[str, ExportSym]]:
|
||||
# create dictionary that maps symbols to libraries that provide them, and their metadata
|
||||
symbol_map = {} # symname -> (lib, exportsym)
|
||||
|
||||
out = subprocess.check_output([readelf_bin, '-sW', *libraries], stderr=subprocess.DEVNULL)
|
||||
|
||||
lines = out.decode('utf-8').splitlines()
|
||||
curfile = libraries[0]
|
||||
soname = curfile.split("/")[-1]
|
||||
for line in lines:
|
||||
fields = line.split()
|
||||
if len(fields) < 2:
|
||||
continue
|
||||
if fields[0] == "File:":
|
||||
curfile = fields[1]
|
||||
soname = curfile.split("/")[-1]
|
||||
|
||||
if len(fields) != 8:
|
||||
continue
|
||||
|
||||
typ, scope, vis, ndx, name = fields[3:8]
|
||||
if vis != "DEFAULT" \
|
||||
or scope == "LOCAL": #\
|
||||
#or (ndx == "UND" and scope != "WEAK"):# \ # nah, that one's done further down the line as well
|
||||
#or typ in unsupported_symtym:
|
||||
# ^ except, for the last case, we're going to emit proper errors later on
|
||||
continue
|
||||
|
||||
# strip away GLIBC versions
|
||||
name = re.sub(r"@@.*$", "", name)
|
||||
|
||||
symbol_map.setdefault(name, {})[soname] = ExportSym(name, typ, scope, vis, ndx)
|
||||
|
||||
return symbol_map
|
||||
|
||||
|
||||
# this ordening is specific to ONE symbol!
|
||||
def build_preferred_lib_order(sym, libs: Dict[str, ExportSym]) -> List[Tuple[str, ExportSym]]:
|
||||
# libs: lib -> syminfo
|
||||
realdefs = [(k, v) for k, v in libs.items() if v.scope != "WEAK"]
|
||||
weakdefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx != "UND"]
|
||||
weakunddefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx == "UND"]
|
||||
|
||||
#assert len(realdefs) + len(weakdefs) + len(weakunddefs) == len(libs)
|
||||
|
||||
if len(realdefs) > 1 or (len(realdefs) == 0 and len(weakdefs) > 1):
|
||||
error("E: symbol '%s' defined non-weakly in multiple libraries! (%s)"
|
||||
% (sym, ', '.join(libs.keys())))
|
||||
if len(realdefs) == 0 and len(weakdefs) == 0: # must be in weakunddefs
|
||||
error("E: no default weak implementation found for symbol '%s'" % sym)
|
||||
|
||||
return realdefs + weakdefs + weakunddefs
|
||||
|
||||
def has_good_subordening(needles, haystack):
|
||||
haylist = [x[0] for x in haystack]
|
||||
prevind = 0
|
||||
for k, _ in needles:
|
||||
curind = None
|
||||
try:
|
||||
curind = haylist.index(k)
|
||||
except ValueError: # not in haystack --> eh, let's ignore
|
||||
continue
|
||||
|
||||
if curind < prevind:
|
||||
return False
|
||||
prevind = curind
|
||||
return True
|
||||
|
||||
def add_with_ordening(haystack: List[Tuple[str, Dict[str, str]]], # [(libname, (symname -> reloctyp))]
|
||||
needles: List[Tuple[str, ExportSym]], # [(lib, syminfo)]
|
||||
sym: str, reloc: str) \
|
||||
-> List[Tuple[str, Dict[str, str]]]:
|
||||
haylist = [x[0] for x in haystack]
|
||||
startind = 0
|
||||
for k, v in needles:
|
||||
#eprintf("k=",k,"v=",v)
|
||||
try:
|
||||
newind = haylist.index(k)
|
||||
assert newind >= startind, "???? (%d <= %d)" % (newind, startind)
|
||||
startind = newind
|
||||
|
||||
symrelocdict = haystack[startind][1]
|
||||
if v.name in symrelocdict:
|
||||
assert False, "?????"
|
||||
haystack[startind][1][v.name] = reloc
|
||||
except ValueError: # not in haystack --> add!
|
||||
startind = startind + 1
|
||||
haystack.insert(startind, (k, {v.name:reloc}))
|
||||
haylist.insert(startind, k)
|
||||
|
||||
return haystack
|
||||
|
||||
def resolve_extern_symbols(needed: Dict[str, List[str]], # symname -> reloctyps
|
||||
available: Dict[str, Dict[str, ExportSym]], # symname -> (lib -> syminfo)
|
||||
args) \
|
||||
-> OrderedDict[str, Dict[str, str]]: # libname -> (symname -> reloctyp)
|
||||
# first of all, we're going to check which needed symbols are provided by
|
||||
# which libraries
|
||||
bound = {} # sym -> (reloc, (lib -> syminfo))
|
||||
for k, v in needed.items():
|
||||
if k not in available:
|
||||
error("E: symbol '%s' could not be found." % k)
|
||||
|
||||
bound[k] = (v, available[k])
|
||||
|
||||
# default ordening
|
||||
bound = bound.items()
|
||||
if args.det:
|
||||
bound = sorted(bound, key=lambda kv: (len(kv[0]), kv[0]))
|
||||
|
||||
#eprintf("bound", bound)
|
||||
|
||||
liborder = [] # [(libname, (symname -> reloctyp))]
|
||||
for k, v in bound: # k: sym (str)
|
||||
# reloc: str
|
||||
# libs: lib -> syminfo
|
||||
reloc, libs = v[0], v[1]
|
||||
if len(libs) <= 1:
|
||||
continue
|
||||
# preferred: [(lib, syminfo)]
|
||||
preferred = build_preferred_lib_order(k, libs)
|
||||
#eprintf("preferred",preferred)
|
||||
if not has_good_subordening(preferred, liborder):
|
||||
message = None
|
||||
if args.fuse_dnload_loader and not args.fskip_zero_value:
|
||||
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
|
||||
"for symbol '%s', you are STRONGLY advised to use `-fskip-zero-value'!"
|
||||
if not args.fuse_dnload_loader and not args.fskip_zero_value:
|
||||
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
|
||||
"for symbol '%s', you might want to enable `-fskip-zero-value'."
|
||||
if message is not None:
|
||||
eprintf(message % (', '.join(liborder.keys()), ', '.join(preferred.keys()), k))
|
||||
|
||||
liborder = add_with_ordening(liborder, preferred, k, reloc)
|
||||
#eprintf("new order",liborder)
|
||||
|
||||
# add all those left without any possible preferred ordening
|
||||
for k, v in bound:
|
||||
reloc, libs = v[0], v[1]
|
||||
if len(libs) == 0:
|
||||
assert False, ("??? (%s)" % sym)
|
||||
if len(libs) != 1:
|
||||
continue
|
||||
lib = libs.popitem() # (lib, syminfo)
|
||||
liborder = add_with_ordening(liborder, [lib], k, reloc)
|
||||
#eprintf("new order (no preference)",liborder)
|
||||
|
||||
#eprintf("ordered", liborder)
|
||||
return OrderedDict(liborder)
|
||||
|
||||
|
|
222
smold.py
222
smold.py
|
@ -14,6 +14,124 @@ from smol.parse import *
|
|||
from smol.emit import *
|
||||
from smol.cnl import *
|
||||
|
||||
def preproc_args(args):
|
||||
if args.hash16 and args.crc32c: # shouldn't happen anymore
|
||||
error("Cannot combine --hash16 and --crc32c!")
|
||||
|
||||
if args.debug:
|
||||
args.cflags.append('-g')
|
||||
args.ldflags.append('-g')
|
||||
args.asflags.append('-g')
|
||||
|
||||
if args.hash16 or args.crc32c:
|
||||
args.fuse_dnload_loader = True
|
||||
|
||||
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
|
||||
|
||||
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
|
||||
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
|
||||
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
|
||||
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
|
||||
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
|
||||
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
|
||||
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
|
||||
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
|
||||
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
|
||||
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
|
||||
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
|
||||
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
|
||||
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
|
||||
|
||||
for x in ['nasm','cc','readelf']:
|
||||
val = args.__dict__[x]
|
||||
if val is None or not os.path.isfile(val):
|
||||
error("'%s' binary%s not found" %
|
||||
(x, ("" if val is None else (" ('%s')" % val))))
|
||||
|
||||
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
|
||||
if arch not in archmagic:
|
||||
error("Unknown/unsupported architecture '%s'" % str(arch))
|
||||
if args.verbose: eprintf("arch: %s" % str(arch))
|
||||
|
||||
if args.hash16 and arch not in ('i386', 3):
|
||||
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
|
||||
|
||||
return args, arch
|
||||
|
||||
|
||||
def do_smol_run(args, arch):
|
||||
objinput = None
|
||||
objinputistemp = False
|
||||
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
|
||||
if not args.gen_rt_only:
|
||||
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
|
||||
tmp_asm_fd = tmp_asm_file[0]
|
||||
tmp_asm_file = tmp_asm_file[1]
|
||||
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
|
||||
os.close(tmp_elf_file[0])
|
||||
tmp_elf_file = tmp_elf_file[1]
|
||||
|
||||
try:
|
||||
#for inp in args.input:
|
||||
# if not is_valid_elf(inp):
|
||||
# error("Input file '%s' is not a valid ELF file!" % inp)
|
||||
|
||||
# if >1 input OR input is LTO object:
|
||||
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
|
||||
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
|
||||
objinputistemp = True
|
||||
os.close(fd)
|
||||
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
|
||||
else:
|
||||
objinput = args.input[0]
|
||||
|
||||
# generate smol hashtab
|
||||
cc_paths = get_cc_paths(args.cc)
|
||||
syms = get_needed_syms(args.readelf, objinput)
|
||||
spaths = args.libdir + cc_paths['libraries']
|
||||
libraries = cc_paths['libraries']
|
||||
libs = find_libs(spaths, args.library)
|
||||
if args.verbose:
|
||||
eprintf("libs = %s" % str(libs))
|
||||
|
||||
libs_symbol_map = build_symbol_map(args.readelf, libs)
|
||||
#symbols = {}
|
||||
#for symbol, reloc in syms:
|
||||
# if symbol not in libs_symbol_map:
|
||||
# error("could not find symbol: {}".format(symbol))
|
||||
# libs_for_symbol = libs_symbol_map[symbol]
|
||||
# if len(libs_for_symbol) > 1:
|
||||
# error("E: the symbol '%s' is provided by more than one library: %s"
|
||||
# % (symbol, str(libs_for_symbol)))
|
||||
# library = libs_for_symbol.pop()
|
||||
# symbols.setdefault(library, [])
|
||||
# symbols[library].append((symbol, reloc))
|
||||
symbols = resolve_extern_symbols(syms, libs_symbol_map, args)
|
||||
|
||||
with (open(args.output,'w') if args.gen_rt_only
|
||||
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
|
||||
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
|
||||
if args.verbose:
|
||||
eprintf("wrote symtab to %s" % tmp_asm_file)
|
||||
|
||||
if not args.gen_rt_only:
|
||||
# assemble hash table/ELF header
|
||||
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
|
||||
tmp_asm_file, tmp_elf_file, args.asflags)
|
||||
|
||||
# link with LD into the final executable, w/ special linker script
|
||||
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
|
||||
args.output, args.ldflags, False)
|
||||
if args.debugout is not None:
|
||||
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
|
||||
args.debugout, args.ldflags, True)
|
||||
finally:
|
||||
if not args.keeptmp:
|
||||
if objinputistemp: os.remove(objinput)
|
||||
if not args.gen_rt_only: os.remove(tmp_asm_file)
|
||||
os.remove(tmp_elf_file)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-m', '--target', default='', \
|
||||
|
@ -146,109 +264,9 @@ def main():
|
|||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.hash16 and args.crc32c: # shouldn't happen anymore
|
||||
error("Cannot combine --hash16 and --crc32c!")
|
||||
args, arch = preproc_args(args)
|
||||
do_smol_run(args, arch)
|
||||
|
||||
if args.debug:
|
||||
args.cflags.append('-g')
|
||||
args.ldflags.append('-g')
|
||||
args.asflags.append('-g')
|
||||
|
||||
if args.hash16 or args.crc32c:
|
||||
args.fuse_dnload_loader = True
|
||||
|
||||
args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader
|
||||
|
||||
if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
|
||||
if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
|
||||
if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
|
||||
if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
|
||||
if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
|
||||
if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
|
||||
if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
|
||||
if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
|
||||
if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
|
||||
if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
|
||||
if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT")
|
||||
if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV")
|
||||
if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP")
|
||||
|
||||
for x in ['nasm','cc','readelf']:
|
||||
val = args.__dict__[x]
|
||||
if val is None or not os.path.isfile(val):
|
||||
error("'%s' binary%s not found" %
|
||||
(x, ("" if val is None else (" ('%s')" % val))))
|
||||
|
||||
arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
|
||||
if arch not in archmagic:
|
||||
error("Unknown/unsupported architecture '%s'" % str(arch))
|
||||
if args.verbose: eprintf("arch: %s" % str(arch))
|
||||
|
||||
if args.hash16 and arch not in ('i386', 3):
|
||||
error("Cannot use --hash16 for arch `%s' (not i386)" % (arch))
|
||||
|
||||
objinput = None
|
||||
objinputistemp = False
|
||||
tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None
|
||||
if not args.gen_rt_only:
|
||||
tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
|
||||
tmp_asm_fd = tmp_asm_file[0]
|
||||
tmp_asm_file = tmp_asm_file[1]
|
||||
tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
|
||||
os.close(tmp_elf_file[0])
|
||||
tmp_elf_file = tmp_elf_file[1]
|
||||
|
||||
try:
|
||||
# if >1 input OR input is LTO object:
|
||||
if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
|
||||
fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
|
||||
objinputistemp = True
|
||||
os.close(fd)
|
||||
cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
|
||||
else: objinput = args.input[0]
|
||||
|
||||
# generate smol hashtab
|
||||
cc_paths = get_cc_paths(args.cc)
|
||||
syms = get_needed_syms(args.readelf, objinput)
|
||||
spaths = args.libdir + cc_paths['libraries']
|
||||
libraries = cc_paths['libraries']
|
||||
libs = find_libs(spaths, args.library)
|
||||
if args.verbose: eprintf("libs = %s" % str(libs))
|
||||
libs_symbol_map = build_symbol_map(args.readelf, libs)
|
||||
symbols = {}
|
||||
for symbol, reloc in syms:
|
||||
if symbol not in libs_symbol_map:
|
||||
error("could not find symbol: {}".format(symbol))
|
||||
libs_for_symbol = libs_symbol_map[symbol]
|
||||
if len(libs_for_symbol) > 1:
|
||||
error("E: the symbol '%s' is provided by more than one library: %s"
|
||||
% (symbol, str(libs_for_symbol)))
|
||||
library = libs_for_symbol.pop()
|
||||
symbols.setdefault(library, [])
|
||||
symbols[library].append((symbol, reloc))
|
||||
|
||||
with (open(args.output,'w') if args.gen_rt_only
|
||||
else os.fdopen(tmp_asm_fd, mode='w')) as taf:
|
||||
output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det)
|
||||
if args.verbose:
|
||||
eprintf("wrote symtab to %s" % tmp_asm_file)
|
||||
|
||||
if not args.gen_rt_only:
|
||||
# assemble hash table/ELF header
|
||||
nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
|
||||
tmp_asm_file, tmp_elf_file, args.asflags)
|
||||
|
||||
# link with LD into the final executable, w/ special linker script
|
||||
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
|
||||
args.output, args.ldflags, False)
|
||||
if args.debugout is not None:
|
||||
ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
|
||||
args.debugout, args.ldflags, True)
|
||||
finally:
|
||||
if not args.keeptmp:
|
||||
if objinputistemp: os.remove(objinput)
|
||||
if not args.gen_rt_only: os.remove(tmp_asm_file)
|
||||
os.remove(tmp_elf_file)
|
||||
|
||||
if __name__ == '__main__':
|
||||
rv = main()
|
||||
|
|
96
smoldd.py
96
smoldd.py
|
@ -12,13 +12,15 @@ from smol.parse import *
|
|||
|
||||
|
||||
def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1)
|
||||
def readshort(blob, off):return struct.unpack('<H', blob[off:off+2])[0], (off+2)
|
||||
def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4)
|
||||
def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
|
||||
def readstr(blob, off):
|
||||
text = bytearray()
|
||||
while True:
|
||||
char, off = readbyte(blob, off)
|
||||
if char == 0: break
|
||||
if char == 0:
|
||||
break
|
||||
|
||||
text.append(char)
|
||||
|
||||
|
@ -34,7 +36,8 @@ def find_libs(deflibs, libname):
|
|||
dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs
|
||||
|
||||
for d in dirs:
|
||||
for f in glob.glob(glob.escape(d + '/' + libname) + '*'): yield f
|
||||
for f in glob.glob(glob.escape("%s/%s" % (d, libname)) + '*'):
|
||||
yield f
|
||||
|
||||
def build_hashtab(readelf_bin, lib, hashid):
|
||||
symbols = list_symbols(readelf_bin, lib)
|
||||
|
@ -79,7 +82,7 @@ def get_hashtbl(elf, blob, args):
|
|||
txtoff = txtoff + 1
|
||||
|
||||
#eprintf("Hash table offset: 0x%08x?" % txtoff)
|
||||
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
|
||||
htaddr, ___ = readint(blob, txtoff)
|
||||
else: # 64-bit
|
||||
txtoff = addr2off(elf, elf.entry)
|
||||
# scan for 'push IMM32'
|
||||
|
@ -92,7 +95,7 @@ def get_hashtbl(elf, blob, args):
|
|||
# except, this is actually the value we're looking for when the binary
|
||||
# had been linked with -fuse-dnload-loader! so let's just check the
|
||||
# value
|
||||
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
|
||||
htaddr, ___ = readint(blob, txtoff)
|
||||
|
||||
#eprintf("ELF entry == 0x%08x" % elf.entry)
|
||||
if htaddr == elf.entry:
|
||||
|
@ -103,7 +106,7 @@ def get_hashtbl(elf, blob, args):
|
|||
txtoff = txtoff + 1
|
||||
|
||||
#eprintf("Hash table offset: 0x%08x?" % txtoff)
|
||||
htaddr = struct.unpack('<I', blob[txtoff:txtoff+4])[0]
|
||||
htaddr, ___ = readint(blob, txtoff)
|
||||
else:
|
||||
pass#eprintf("Hash table offset: 0x%08x?" % txtoff)
|
||||
|
||||
|
@ -123,7 +126,7 @@ def get_hashtbl(elf, blob, args):
|
|||
if len(blob) <= htoff and len(tbl) > 0:
|
||||
break
|
||||
#if elf.is32bit:
|
||||
if struct.unpack('<B', blob[htoff:htoff+1])[0] == 0:
|
||||
if readbyte(blob, htoff)[0] == 0:
|
||||
break
|
||||
else:
|
||||
assert False, "AAAAA rest is %s" % repr(blob[htoff:])
|
||||
|
@ -132,15 +135,52 @@ def get_hashtbl(elf, blob, args):
|
|||
# break
|
||||
# else:
|
||||
# assert False, "AAAAA rest is %s" % repr(blob[htoff:])
|
||||
val = struct.unpack(('<I' if hashsz == 4 else '<H'),
|
||||
blob[htoff:htoff+hashsz])[0]
|
||||
if (val & 0xFFFF) == 0: break
|
||||
val, ___ = (readshort if hashsz == 2 else readint)(blob, htoff)
|
||||
if (val & 0xFFFF) == 0:
|
||||
break
|
||||
tbl.append(val)
|
||||
#eprintf("sym %08x" % val)
|
||||
htoff = htoff + (4 if elf.is32bit else 8)
|
||||
|
||||
return tbl
|
||||
|
||||
def do_smoldd_run(args):
|
||||
blob = args.input.read()
|
||||
elf = hackyelf.parse(blob)
|
||||
|
||||
deflibs = get_def_libpaths(args.cc, elf.is32bit)
|
||||
needed = get_needed_libs(elf, blob)
|
||||
neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
|
||||
|
||||
htbl = get_hashtbl(elf, blob, args)
|
||||
|
||||
hashid = get_hash_id(args.hash16, args.crc32c)
|
||||
libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
|
||||
|
||||
hashresolves = dict({})
|
||||
noresolves = []
|
||||
for x in htbl:
|
||||
done = False
|
||||
for l, v in libhashes.items():
|
||||
if x in v:
|
||||
hashresolves.setdefault(l, {})[x] = v[x]
|
||||
done = True
|
||||
break
|
||||
if not done:
|
||||
noresolves.append(x)
|
||||
|
||||
for l, v in hashresolves.items():
|
||||
print("%s:" % l)
|
||||
for x in v.keys():
|
||||
print("\t%08x -> %s" % (x, v[x]))
|
||||
|
||||
if len(noresolves) > 0:
|
||||
print("UNRESOLVED:")
|
||||
for x in noresolves:
|
||||
print("\t%08x" % x)
|
||||
|
||||
return 0
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('input', type=argparse.FileType('rb'),
|
||||
|
@ -162,43 +202,7 @@ def main():
|
|||
help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
|
||||
args = parser.parse_args()
|
||||
|
||||
blob = args.input.read()
|
||||
elf = hackyelf.parse(blob)
|
||||
|
||||
deflibs = get_def_libpaths(args.cc, elf.is32bit)
|
||||
needed = get_needed_libs(elf, blob)
|
||||
neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
|
||||
|
||||
htbl = get_hashtbl(elf, blob, args)
|
||||
|
||||
hashid = get_hash_id(args.hash16, args.crc32c)
|
||||
libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
|
||||
|
||||
hashresolves = dict({})
|
||||
noresolves = []
|
||||
# TODO: group by libs
|
||||
for x in htbl:
|
||||
done = False
|
||||
for l in libhashes.keys():
|
||||
v = libhashes[l]
|
||||
if x in v:
|
||||
if l not in hashresolves: hashresolves[l] = dict({})
|
||||
hashresolves[l][x] = v[x]
|
||||
done = True
|
||||
break
|
||||
if not done: noresolves.append(x)
|
||||
|
||||
for l in hashresolves.keys():
|
||||
print("%s:" % l)
|
||||
v = hashresolves[l]
|
||||
for x in v.keys():
|
||||
print("\t%08x -> %s" % (x, v[x]))
|
||||
|
||||
if len(noresolves) > 0:
|
||||
print("UNRESOLVED:")
|
||||
for x in noresolves: print("\t%08x" % x)
|
||||
|
||||
return 0
|
||||
return do_smoldd_run(args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
rv = main()
|
||||
|
|
Loading…
Reference in New Issue