import glob
import os.path
import re
import subprocess
import struct
import sys
from typing import NamedTuple, List, Dict, OrderedDict, Tuple, Set
from .shared import *
implicit_syms = { '_GLOBAL_OFFSET_TABLE_' }
unsupported_symtyp = { 'NOTYPE', 'TLS', 'OBJECT' } # TODO: support OBJECT, and maybe TLS too
class ExportSym(NamedTuple):
name: str
typ: str
scope: str
vis: str
ndx: str
def decide_arch(inpfiles):
archs = set()
for fp in inpfiles:
with open(fp, 'rb') as ff:
magi = # EI_MAGx of ei_ident
if magi != b'\x7fELF':
error("Input file '%s' is not an ELF file!" % fp)
_ = # rest of ei_ident
_ = 2) # ei_type
machine = # ei_machine
machnum = struct.unpack('<H', machine)[0]
if len(archs) != 1:
error("Input files have multiple architectures, can't link this...")
archn = archs.pop()
if archn not in archmagic:
eprintf("Unknown architecture number %d" + \
". Consult elf.h and rebuild your object files." % archn)
return archmagic[archn]
def build_reloc_typ_table(reo) -> Dict[str, Set[str]]: # (symname, reloctyps) dict
relocs = {}
for s in reo.decode('utf-8').splitlines():
stuff = s.split()
# prolly a 'header' line
if len(stuff) != 7 and len(stuff) != 5:
symname, reloctyp = stuff[4], stuff[2]
if symname[0] == '.': # bleh
relocs.setdefault(symname, set()).add(reloctyp)
# don't do that here, only check for import/external symbols (in get_needed_syms)
#if symname in relocs:
# rlc = relocs[symname]
# if rlc != reloctyp:
# error("E: symbol '%s' used with multiple relocation types! (%s <-> %s)"
# % (symname, reloctyp, rlc))
# relocs[symname] = reloctyp
return relocs
def has_lto_object(readelf_bin, files):
for x in files:
with open(x,'rb') as f:
if == b'BC': # LLVM bitcode! --> clang -flto
return True
output = subprocess.check_output([readelf_bin, '-s', '-W'] + files,
curfile = files[0]
for entry in output.decode('utf-8').splitlines():
stuff = entry.split()
if len(stuff) < 2:
if stuff[0] == "File:":
curfile = stuff[1]
# assuming nobody uses a symbol called "__gnu_lto_"...
if "__gnu_lto_" in entry or ".gnu.lto" in entry:
return True
return False
def get_needed_syms(readelf_bin, inpfile) -> Dict[str, str]: # (symname, reloctyp) dict
output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile],
outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile],
relocs = build_reloc_typ_table(outrel)
curfile = inpfile
syms = {}
for entry in output.decode('utf-8').splitlines():
stuff = entry.split()
if len(stuff) < 2:
if stuff[0] == "File:":
curfile = stuff[1]
if len(stuff) < 8:
scope, ndx, name = stuff[4], stuff[6], stuff[7]
if name.startswith("__gnu_lto_"): # yikes, an LTO object
error("E: {} is an LTO object file, can't use this!".format(curfile))
if scope == "GLOBAL" and ndx == "UND" and len(name) > 0:
if name in relocs:
rlt = relocs[name]
if len(rlt) > 1:
error("E: symbol '%s' has multiple relocations types?! (%s)"
% (name, ', '.join(rlt)))
#syms.add((name, rlt.pop()))
if name in syms:
assert False, ("??? %s" % name)
syms[name] = rlt.pop()
elif name not in implicit_syms:
error("E: symbol '%s' has no relocation type?!" % name)
#needgot = False
#if "_GLOBAL_OFFSET_TABLE_" in syms:
# needgot = True
# syms.remove("_GLOBAL_OFFSET_TABLE_")
return syms#, needgot
def format_cc_path_line(entry):
category, path = entry.split(': ', 1)
path = path.lstrip('=')
return (category, list(set(os.path.realpath(p) \
for p in path.split(':') if os.path.isdir(p))))
def get_cc_paths(cc_bin):
bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs!
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
os.environ = bak
outputpairs = list(map(format_cc_path_line,
paths = {}
for category, path in outputpairs: paths[category] = path
if 'libraries' not in paths: # probably localized... sigh
# monkeypatch, assuming order...
paths = {}
paths['install' ] = outputpairs[0][1]
paths['programs' ] = outputpairs[1][1]
paths['libraries'] = outputpairs[2][1]
return paths
def get_cc_version(cc_bin):
bak = os.environ.copy()
os.environ['LANG'] = "C" # DON'T output localized search dirs!
output = subprocess.check_output([cc_bin, '--version'],
os.environ = bak
lines = output.decode('utf-8').splitlines()
if "Free Software Foundation" in lines[1]: # GCC
verstr = lines[0].split()[-1]
return ("gcc", tuple(map(int, verstr.split('.'))))
else: # assume clang
verstr = lines[0].split()[-1]
return ("clang", tuple(map(int, verstr.split('.'))))
def is_valid_elf(f): # Good Enough(tm)
with open(f, 'rb') as ff:
return == b'\x7FELF'
def find_lib(spaths, wanted):
for p in spaths:
for f in glob.glob(glob.escape('%s/lib%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
for f in glob.glob(glob.escape('%s/%s' % (p, wanted)) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
error("E: couldn't find library '%s'." % wanted)
def find_libs(spaths, wanted):
return [find_lib(spaths, l) for l in wanted]
def list_symbols(readelf_bin, lib):
out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL)
lines = set(out.decode('utf-8').split('\n'))
symbols = []
for line in lines:
fields = re.split(r"\s+", line)
if len(fields) != 9:
vis, ndx, symbol = fields[6:9]
if vis != "DEFAULT" or ndx == "UND":
# strip away GLIBC versions
symbol = re.sub(r"@@.*$", "", symbol)
return symbols
def build_symbol_map(readelf_bin, libraries) -> Dict[str, Dict[str, ExportSym]]:
# create dictionary that maps symbols to libraries that provide them, and their metadata
symbol_map = {} # symname -> (lib, exportsym)
out = subprocess.check_output([readelf_bin, '-sW', *libraries], stderr=subprocess.DEVNULL)
lines = out.decode('utf-8').splitlines()
curfile = libraries[0]
soname = curfile.split("/")[-1]
for line in lines:
fields = line.split()
if len(fields) < 2:
if fields[0] == "File:":
curfile = fields[1]
soname = curfile.split("/")[-1]
if len(fields) != 8:
typ, scope, vis, ndx, name = fields[3:8]
if vis != "DEFAULT" \
or scope == "LOCAL": #\
#or (ndx == "UND" and scope != "WEAK"):# \ # nah, that one's done further down the line as well
#or typ in unsupported_symtym:
# ^ except, for the last case, we're going to emit proper errors later on
# strip away GLIBC versions
name = re.sub(r"@@.*$", "", name)
symbol_map.setdefault(name, {})[soname] = ExportSym(name, typ, scope, vis, ndx)
return symbol_map
# this ordening is specific to ONE symbol!
def build_preferred_lib_order(sym, libs: Dict[str, ExportSym]) -> List[Tuple[str, ExportSym]]:
# libs: lib -> syminfo
realdefs = [(k, v) for k, v in libs.items() if v.scope != "WEAK"]
weakdefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx != "UND"]
weakunddefs = [(k, v) for k, v in libs.items() if v.scope == "WEAK" and v.ndx == "UND"]
#assert len(realdefs) + len(weakdefs) + len(weakunddefs) == len(libs)
if len(realdefs) > 1 or (len(realdefs) == 0 and len(weakdefs) > 1):
error("E: symbol '%s' defined non-weakly in multiple libraries! (%s)"
% (sym, ', '.join(libs.keys())))
if len(realdefs) == 0 and len(weakdefs) == 0: # must be in weakunddefs
error("E: no default weak implementation found for symbol '%s'" % sym)
return realdefs + weakdefs + weakunddefs
def has_good_subordening(needles, haystack):
haylist = [x[0] for x in haystack]
prevind = 0
for k, _ in needles:
curind = None
curind = haylist.index(k)
except ValueError: # not in haystack --> eh, let's ignore
if curind < prevind:
return False
prevind = curind
return True
def add_with_ordening(haystack: List[Tuple[str, Dict[str, str]]], # [(libname, (symname -> reloctyp))]
needles: List[Tuple[str, ExportSym]], # [(lib, syminfo)]
sym: str, reloc: str) \
-> List[Tuple[str, Dict[str, str]]]:
haylist = [x[0] for x in haystack]
startind = 0
for k, v in needles:
newind = haylist.index(k)
assert newind >= startind, "???? (%d <= %d)" % (newind, startind)
startind = newind
symrelocdict = haystack[startind][1]
if in symrelocdict:
assert False, "?????"
haystack[startind][1][] = reloc
except ValueError: # not in haystack --> add!
startind = startind + 1
haystack.insert(startind, (k, {}))
haylist.insert(startind, k)
return haystack
def resolve_extern_symbols(needed: Dict[str, List[str]], # symname -> reloctyps
available: Dict[str, Dict[str, ExportSym]], # symname -> (lib -> syminfo)
args) \
-> OrderedDict[str, Dict[str, str]]: # libname -> (symname -> reloctyp)
# first of all, we're going to check which needed symbols are provided by
# which libraries
bound = {} # sym -> (reloc, (lib -> syminfo))
for k, v in needed.items():
if k not in available:
error("E: symbol '%s' could not be found." % k)
bound[k] = (v, available[k])
# default ordening
bound = bound.items()
if args.det:
bound = sorted(bound, key=lambda kv: (len(kv[0]), kv[0]))
#eprintf("bound", bound)
liborder = [] # [(libname, (symname -> reloctyp))]
for k, v in bound: # k: sym (str)
# reloc: str
# libs: lib -> syminfo
reloc, libs = v[0], v[1]
if len(libs) <= 1:
# preferred: [(lib, syminfo)]
preferred = build_preferred_lib_order(k, libs)
if not has_good_subordening(preferred, liborder):
message = None
if args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you are STRONGLY advised to use `-fskip-zero-value'!"
if not args.fuse_dnload_loader and not args.fskip_zero_value:
message = "W: unreconcilable library ordenings '%s' and '%s' "+\
"for symbol '%s', you might want to enable `-fskip-zero-value'."
if message is not None:
eprintf(message % (', '.join(liborder.keys()), ', '.join(preferred.keys()), k))
liborder = add_with_ordening(liborder, preferred, k, reloc)
#eprintf("new order",liborder)
# add all those left without any possible preferred ordening
for k, v in bound:
reloc, libs = v[0], v[1]
if len(libs) == 0:
assert False, ("??? (%s)" % sym)
if len(libs) != 1:
lib = libs.popitem() # (lib, syminfo)
liborder = add_with_ordening(liborder, [lib], k, reloc)
#eprintf("new order (no preference)",liborder)
#eprintf("ordered", liborder)
return OrderedDict(liborder)