{simple,shoddy,smart} minsize-oriented linker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

424 lines
14 KiB

2 years ago
1 year ago
  1. import glob
  2. import os.path
  3. import re
  4. import subprocess
  5. import struct
  6. import sys
  7. from typing import NamedTuple, List, Dict, OrderedDict, Tuple, Set
  8. from .shared import *
  9. implicit_syms = { '_GLOBAL_OFFSET_TABLE_' }
  10. unsupported_symtyp = { 'NOTYPE', 'TLS', 'OBJECT' } # TODO: support OBJECT, and maybe TLS too
  11. class ExportSym(NamedTuple):
  12. name: str
  13. typ: str
  14. scope: str
  15. vis: str
  16. ndx: str
  17. def decide_arch(inpfiles):
  18. archs = set()
  19. for fp in inpfiles:
  20. with open(fp, 'rb') as ff:
  21. magi = ff.read(4) # EI_MAGx of ei_ident
  22. if magi != b'\x7fELF':
  23. error("Input file '%s' is not an ELF file!" % fp)
  24. _ = ff.read(12) # rest of ei_ident
  25. _ = ff.read( 2) # ei_type
  26. machine = ff.read(2) # ei_machine
  27. machnum = struct.unpack('<H', machine)[0]
  28. archs.add(machnum)
  29. if len(archs) != 1:
  30. error("Input files have multiple architectures, can't link this...")
  31. archn = archs.pop()
  32. if archn not in archmagic:
  33. eprintf("Unknown architecture number %d" + \
  34. ". Consult elf.h and rebuild your object files." % archn)
  35. return archmagic[archn]
  36. def build_reloc_typ_table(reo) -> Dict[str, Set[str]]: # (symname, reloctyps) dict
  37. relocs = {}
  38. for s in reo.decode('utf-8').splitlines():
  39. stuff = s.split()
  40. # prolly a 'header' line
  41. if len(stuff) != 7 and len(stuff) != 5:
  42. continue
  43. symname, reloctyp = stuff[4], stuff[2]
  44. if symname[0] == '.': # bleh
  45. continue
  46. relocs.setdefault(symname, set()).add(reloctyp)
  47. # don't do that here, only check for import/external symbols (in get_needed_syms)
  48. #if symname in relocs:
  49. # rlc = relocs[symname]
  50. # if rlc != reloctyp:
  51. # error("E: symbol '%s' used with multiple relocation types! (%s <-> %s)"
  52. # % (symname, reloctyp, rlc))
  53. #else:
  54. # relocs[symname] = reloctyp
  55. return relocs
  56. def has_lto_object(readelf_bin, files):
  57. for x in files:
  58. with open(x,'rb') as f:
  59. if f.read(2) == b'BC': # LLVM bitcode! --> clang -flto
  60. return True
  61. output = subprocess.check_output([readelf_bin, '-s', '-W'] + files,
  62. stderr=subprocess.DEVNULL)
  63. curfile = files[0]
  64. for entry in output.decode('utf-8').splitlines():
  65. stuff = entry.split()
  66. if len(stuff) < 2:
  67. continue
  68. if stuff[0] == "File:":
  69. curfile = stuff[1]
  70. # assuming nobody uses a symbol called "__gnu_lto_"...
  71. if "__gnu_lto_" in entry or ".gnu.lto" in entry:
  72. return True
  73. return False
  74. def get_needed_syms(readelf_bin, inpfile) -> Dict[str, str]: # (symname, reloctyp) dict
  75. output = subprocess.check_output([readelf_bin, '-s', '-W',inpfile],
  76. stderr=subprocess.DEVNULL)
  77. outrel = subprocess.check_output([readelf_bin, '-r', '-W',inpfile],
  78. stderr=subprocess.DEVNULL)
  79. #eprintf(output.decode('utf-8'))
  80. #eprintf(outrel.decode('utf-8'))
  81. relocs = build_reloc_typ_table(outrel)
  82. curfile = inpfile
  83. syms = {}
  84. for entry in output.decode('utf-8').splitlines():
  85. stuff = entry.split()
  86. if len(stuff) < 2:
  87. continue
  88. if stuff[0] == "File:":
  89. curfile = stuff[1]
  90. if len(stuff) < 8:
  91. continue
  92. scope, ndx, name = stuff[4], stuff[6], stuff[7]
  93. if name.startswith("__gnu_lto_"): # yikes, an LTO object
  94. error("E: {} is an LTO object file, can't use this!".format(curfile))
  95. if scope == "GLOBAL" and ndx == "UND" and len(name) > 0:
  96. if name in relocs:
  97. rlt = relocs[name]
  98. if len(rlt) > 1:
  99. error("E: symbol '%s' has multiple relocations types?! (%s)"
  100. % (name, ', '.join(rlt)))
  101. #syms.add((name, rlt.pop()))
  102. if name in syms:
  103. assert False, ("??? %s" % name)
  104. syms[name] = rlt.pop()
  105. elif name not in implicit_syms:
  106. error("E: symbol '%s' has no relocation type?!" % name)
  107. #needgot = False
  108. #if "_GLOBAL_OFFSET_TABLE_" in syms:
  109. # needgot = True
  110. # syms.remove("_GLOBAL_OFFSET_TABLE_")
  111. return syms#, needgot
  112. def uniq_list(l):
  113. od = OrderedDict()
  114. for x in l: od[x] = x
  115. return list(od.keys())
  116. def format_cc_path_line(entry):
  117. category, path = entry.split(': ', 1)
  118. path = path.lstrip('=')
  119. return (category, uniq_list(os.path.realpath(p) \
  120. for p in path.split(':') if os.path.isdir(p))[::-1])
  121. def get_cc_paths(cc_bin):
  122. bak = os.environ.copy()
  123. os.environ['LANG'] = "C" # DON'T output localized search dirs!
  124. output = subprocess.check_output([cc_bin, '-print-search-dirs'],
  125. stderr=subprocess.DEVNULL)
  126. os.environ = bak
  127. outputpairs = list(map(format_cc_path_line,
  128. output.decode('utf-8').splitlines()))
  129. paths = {}
  130. for category, path in outputpairs: paths[category] = path
  131. if 'libraries' not in paths: # probably localized... sigh
  132. # monkeypatch, assuming order...
  133. paths = {}
  134. paths['install' ] = outputpairs[0][1]
  135. paths['programs' ] = outputpairs[1][1]
  136. paths['libraries'] = outputpairs[2][1]
  137. return paths
  138. def get_cc_version(cc_bin):
  139. bak = os.environ.copy()
  140. os.environ['LANG'] = "C" # DON'T output localized search dirs!
  141. output = subprocess.check_output([cc_bin, '--version'],
  142. stderr=subprocess.DEVNULL)
  143. os.environ = bak
  144. lines = output.decode('utf-8').splitlines()
  145. if "Free Software Foundation" in lines[1]: # GCC
  146. verstr = lines[0].split()[-1]
  147. return ("gcc", tuple(map(int, verstr.split('.'))))
  148. else: # assume clang
  149. verstr = lines[0].split()[-1]
  150. return ("clang", tuple(map(int, verstr.split('.'))))
  151. def is_valid_elf(f): # Good Enough(tm)
  152. with open(f, 'rb') as ff:
  153. return ff.read(4) == b'\x7FELF'
  154. def find_lib(spaths, wanted):
  155. for p in spaths:
  156. for f in glob.glob(glob.escape('%s/lib%s' % (p, wanted)) + '.so*'):
  157. if os.path.isfile(f) and is_valid_elf(f):
  158. return f
  159. for f in glob.glob(glob.escape('%s/%s' % (p, wanted)) + '.so*'):
  160. if os.path.isfile(f) and is_valid_elf(f):
  161. return f
  162. #for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
  163. #for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
  164. error("E: couldn't find library '%s'." % wanted)
  165. def find_libs(spaths, wanted):
  166. return [find_lib(spaths, l) for l in wanted]
  167. def list_symbols(readelf_bin, lib):
  168. out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL)
  169. lines = set(out.decode('utf-8').split('\n'))
  170. symbols = []
  171. for line in lines:
  172. fields = re.split(r"\s+", line)
  173. if len(fields) != 9:
  174. continue
  175. vis, ndx, symbol = fields[6:9]
  176. if vis != "DEFAULT" or ndx == "UND":
  177. continue
  178. # strip away GLIBC versions
  179. symbol = re.sub(r"@@.*$", "", symbol)
  180. symbols.append(symbol)
  181. return symbols
  182. def build_symbol_map(readelf_bin, libraries) -> Dict[str, Dict[str, ExportSym]]:
  183. # create dictionary that maps symbols to libraries that provide them, and their metadata
  184. symbol_map = {} # symname -> (lib, exportsym)
  185. out = subprocess.check_output([readelf_bin, '-sW', *libraries], stderr=subprocess.DEVNULL)
  186. lines = out.decode('utf-8').splitlines()
  187. curfile = libraries[0]
  188. soname = curfile.split("/")[-1]
  189. for line in lines:
  190. fields = line.split()
  191. if len(fields) < 2:
  192. continue
  193. if fields[0] == "File:":
  194. curfile = fields[1]
  195. soname = curfile.split("/")[-1]
  196. if len(fields) != 8:
  197. continue
  198. typ, scope, vis, ndx, name = fields[3:8]
  199. if vis != "DEFAULT" \
  200. or scope == "LOCAL": #\
  201. #or (ndx == "UND" and scope != "WEAK"):# \ # nah, that one's done further down the line as well
  202. #or typ in unsupported_symtym:
  203. # ^ except, for the last case, we're going to emit proper errors later on
  204. continue
  205. # strip away GLIBC versions
  206. name = re.sub(r"@@.*$", "", name)
  207. symbol_map.setdefault(name, {})[soname] = ExportSym(name, typ, scope, vis, ndx)
  208. return symbol_map
  209. # this ordening is specific to ONE symbol!
  210. def build_preferred_lib_order(sym, libs: Dict[str, ExportSym]) -> List[str]:
  211. # libs: lib -> syminfo
  212. realdefs = [lib for lib, v in libs.items() if v.scope != "WEAK" and v.ndx != "UND"]
  213. weakdefs = [lib for lib, v in libs.items() if v.scope == "WEAK" and v.ndx != "UND"]
  214. weakunddefs = [lib for lib, v in libs.items() if v.scope == "WEAK" and v.ndx == "UND"]
  215. unddefs = [lib for lib, v in libs.items() if v.scope != "WEAK" and v.ndx == "UND"]
  216. #ks = [v.name for k, v in libs.items()]
  217. #print("k=",ks)
  218. #assert all(k == ks[0] for k in ks)
  219. if len(realdefs) > 1: #or (len(realdefs) == 0 and len(weakdefs) > 1):
  220. error("E: symbol '%s' defined non-weakly in multiple libraries! (%s)"
  221. % (sym, ', '.join(realdefs)))
  222. if len(realdefs) == 0 and len(weakdefs) > 1:
  223. eprintf("W: symbol '%s' defined amibguously weakly in multiple libraries! Will pick a random one... (%s)"
  224. % (sym, ', '.join(weakdefs)))
  225. if len(realdefs) == 0 and len(weakdefs) == 0: # must be in weakunddefs or unddefs
  226. error("E: no default weak implementation found for symbol '%s'" % sym)
  227. return realdefs + weakdefs + weakunddefs + unddefs
  228. def has_good_subordening(needles, haystack):
  229. haylist = [x[0] for x in haystack]
  230. prevind = 0
  231. for lib in needles:
  232. curind = None
  233. try:
  234. curind = haylist.index(lib)
  235. except ValueError: # not in haystack --> eh, let's ignore
  236. continue
  237. if curind < prevind:
  238. return False
  239. prevind = curind
  240. return True
  241. def add_with_ordening(haystack: List[Tuple[str, Dict[str, str]]], # [(libname, (symname -> reloctyp))]
  242. needles: List[str], # [lib]
  243. sym: str, reloc: str, last=False) \
  244. -> List[Tuple[str, Dict[str, str]]]:
  245. haylist = [x[0] for x in haystack]
  246. startind = None if last else 0
  247. ii = 0
  248. for lib in needles:
  249. #eprintf("k=",k,"v=",v)
  250. try:
  251. newind = haylist.index(lib)
  252. #eprintf("lib=%s newind=%d" % (lib, newind))
  253. #assert newind >= startind, "???? (%d <= %d)" % (newind, startind)
  254. startind = newind
  255. if ii == 0:
  256. symrelocdict = haystack[startind][1]
  257. assert not(sym in symrelocdict), "?????"
  258. haystack[startind][1][sym] = reloc
  259. except ValueError: # not in haystack --> add!
  260. if startind is None:
  261. startind = len(haystack)
  262. if not last:
  263. startind = startind + 1
  264. #eprintf("lib=%s NEWind=%d" % (lib, startind))
  265. dv = {sym: reloc} if ii == 0 else {}
  266. haystack.insert(startind, (lib, dv))
  267. haylist.insert(startind, lib)
  268. if last:
  269. startind = startind + 1
  270. ii = ii + 1
  271. return haystack
  272. def visable(ll):
  273. rr = []
  274. for k, v in ll:
  275. if isinstance(v, ExportSym):
  276. rr.append((k, v)) # v.name
  277. else:
  278. rr.append((k, v.keys()))
  279. return rr
  280. def resolve_extern_symbols(needed: Dict[str, List[str]], # symname -> reloctyps
  281. available: Dict[str, Dict[str, ExportSym]], # symname -> (lib -> syminfo)
  282. args) \
  283. -> OrderedDict[str, Dict[str, str]]: # libname -> (symname -> reloctyp)
  284. # first of all, we're going to check which needed symbols are provided by
  285. # which libraries
  286. bound = {} # sym -> (reloc, (lib -> syminfo))
  287. for k, v in needed.items():
  288. if k not in available:
  289. error("E: symbol '%s' could not be found." % k)
  290. bound[k] = (v, available[k])
  291. # default ordening
  292. bound = bound.items()
  293. if args.det:
  294. bound = sorted(bound, key=lambda kv: (len(kv[0]), kv[0]))
  295. #eprintf("bound", bound,"\n")
  296. liborder = [] # [(libname, (symname -> reloctyp))]
  297. for k, v in bound: # k: sym (str)
  298. # reloc: str
  299. # libs: lib -> syminfo
  300. reloc, libs = v[0], v[1]
  301. if len(libs) <= 1:
  302. continue
  303. # preferred: [lib]
  304. #eprintf("libs",visable(libs.items()))
  305. preferred = build_preferred_lib_order(k, libs)
  306. #eprintf("preferred",preferred)
  307. if not has_good_subordening(preferred, liborder):
  308. message = None
  309. if args.fuse_dnload_loader and not args.fskip_zero_value:
  310. message = "W: unreconcilable library ordenings '%s' and '%s' "+\
  311. "for symbol '%s', you are STRONGLY advised to use `-fskip-zero-value'!"
  312. if not args.fuse_dnload_loader and not args.fskip_zero_value:
  313. message = "W: unreconcilable library ordenings '%s' and '%s' "+\
  314. "for symbol '%s', you might want to enable `-fskip-zero-value'."
  315. if message is not None:
  316. eprintf(message % (', '.join(liborder.keys()), ', '.join(preferred.keys()), k))
  317. liborder = add_with_ordening(liborder, preferred, k, reloc)
  318. #eprintf("new order",visable(liborder),"\n")
  319. # add all those left without any possible preferred ordening
  320. for k, v in bound:
  321. reloc, libs = v[0], v[1]
  322. if len(libs) == 0:
  323. assert False, ("??? (%s)" % sym)
  324. if len(libs) != 1:
  325. continue
  326. lib = libs.popitem() # (lib, syminfo)
  327. #eprintf("lib",lib)
  328. liborder = add_with_ordening(liborder, [lib[0]], k, reloc, True)
  329. #eprintf("new order (no preference)",visable(liborder),"\n")
  330. #eprintf("ordered", visable(liborder))
  331. return OrderedDict(liborder)