{simple,shoddy,smart} minsize-oriented linker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
7.4 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. #!/usr/bin/env python3
  2. import os.path, struct, sys
  3. import argparse, glob, shutil, subprocess
  4. import smol.hackyelf as hackyelf
  5. import smol.linkmap as linkmap
  6. from smol.shared import *
  7. from smol.parse import *
  8. # TODO: support for hashes that aren't djb2
  9. def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1)
  10. def readshort(blob, off):return struct.unpack('<H', blob[off:off+2])[0], (off+2)
  11. def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4)
  12. def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
  13. def readstr(blob, off):
  14. text = bytearray()
  15. while True:
  16. char, off = readbyte(blob, off)
  17. if char == 0:
  18. break
  19. text.append(char)
  20. return text.decode('utf-8'), off
  21. def get_def_libpaths(cc_bin, is32bit):
  22. # FIXME: HACK
  23. if is32bit: return ['/usr/lib32/','/lib32/']
  24. return get_cc_paths(cc_bin)['libraries']
  25. def find_libs(deflibs, libname):
  26. dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs
  27. for d in dirs:
  28. for f in glob.glob(glob.escape("%s/%s" % (d, libname)) + '*'):
  29. yield f
  30. def build_hashtab(readelf_bin, lib, hashid):
  31. symbols = list_symbols(readelf_bin, lib)
  32. hashfn = get_hash_fn(hashid)
  33. return { hashfn(symbol):symbol for symbol in symbols }
  34. def addr2off(elf, addr):
  35. for x in elf.phdrs:
  36. if x.ptype != hackyelf.PT_LOAD: continue
  37. if addr >= x.vaddr and addr < x.vaddr + x.memsz:
  38. aoff = addr - x.vaddr
  39. assert aoff < x.filesz, ".bss address!"
  40. return aoff + x.off
  41. error("E: Address %08x not in the static address range!" % addr)
  42. def get_needed_libs(elf, blob):
  43. assert elf.dyn is not None, "No DYNAMIC table present in the ELF file!"
  44. strtabs = [x.val for x in elf.dyn if x.tag == hackyelf.DT_STRTAB]
  45. assert len(strtabs) == 1, "Only one DT_STRTAB may be present in an ELF file."
  46. strtab = strtabs[0]
  47. return [readstr(blob,addr2off(elf, strtab+x.val))[0]
  48. for x in elf.dyn if x.tag == hackyelf.DT_NEEDED]
  49. def get_hashtbl(elf, blob, args):
  50. htaddr = None
  51. if args.map is not None:
  52. lmap = linkmap.parse(args.map.read())
  53. tabs = [x for x in lmap.mmap if x.sym == '_symbols']
  54. assert len(tabs) == 1, "One '_symbols' symbol must be present."
  55. htaddr = tabs[0].org
  56. elif elf.is32bit:
  57. txtoff = addr2off(elf, elf.entry)
  58. # scan for 'push IMM32'
  59. while blob[txtoff] != 0x68:
  60. txtoff = txtoff + 1
  61. assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable address)"
  62. txtoff = txtoff + 1
  63. #eprintf("Hash table offset: 0x%08x?" % txtoff)
  64. htaddr, ___ = readint(blob, txtoff)
  65. else: # 64-bit
  66. txtoff = addr2off(elf, elf.entry)
  67. # scan for 'push IMM32'
  68. # but the first one we'll encounter pushes the entrypoint addr!
  69. while blob[txtoff] != 0x68:
  70. txtoff = txtoff + 1
  71. assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable or entrypoint address)"
  72. txtoff = txtoff + 1
  73. # except, this is actually the value we're looking for when the binary
  74. # had been linked with -fuse-dnload-loader! so let's just check the
  75. # value
  76. htaddr, ___ = readint(blob, txtoff)
  77. #eprintf("ELF entry == 0x%08x" % elf.entry)
  78. if htaddr == elf.entry:
  79. # now we can look for the interesting address
  80. while blob[txtoff] != 0x68:
  81. txtoff = txtoff + 1
  82. assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable address)"
  83. txtoff = txtoff + 1
  84. #eprintf("Hash table offset: 0x%08x?" % txtoff)
  85. htaddr, ___ = readint(blob, txtoff)
  86. else:
  87. pass#eprintf("Hash table offset: 0x%08x?" % txtoff)
  88. assert htaddr is not None, "wtf? (no hashtable address)"
  89. #eprintf("Hash table address: 0x%08x" % htaddr)
  90. htoff = addr2off(elf, htaddr)
  91. #eprintf("Hash table offset: 0x%08x" % htoff)
  92. tbl = []
  93. while True:
  94. hashsz = 2 if elf.is32bit and args.hash16 else 4
  95. #eprintf("sym from 0x%08x" % htoff)
  96. #eprintf("sym end at 0x%08x, blob end at 0x%08x" % (htoff+hashsz, len(blob)))
  97. if htoff+hashsz > len(blob):
  98. #eprintf("htoff = 0x%08x, len=%08x" % (htoff, len(blob)))
  99. if len(blob) <= htoff and len(tbl) > 0:
  100. break
  101. #if elf.is32bit:
  102. if readbyte(blob, htoff)[0] == 0:
  103. break
  104. else:
  105. assert False, "AAAAA rest is %s" % repr(blob[htoff:])
  106. #else:
  107. # if struct.unpack('<H', blob[htoff:htoff+2])[0] == 0:
  108. # break
  109. # else:
  110. # assert False, "AAAAA rest is %s" % repr(blob[htoff:])
  111. val, ___ = (readshort if hashsz == 2 else readint)(blob, htoff)
  112. if (val & 0xFFFF) == 0:
  113. break
  114. tbl.append(val)
  115. #eprintf("sym %08x" % val)
  116. htoff = htoff + (4 if elf.is32bit else 8)
  117. return tbl
  118. def do_smoldd_run(args):
  119. blob = args.input.read()
  120. elf = hackyelf.parse(blob)
  121. deflibs = get_def_libpaths(args.cc, elf.is32bit)
  122. needed = get_needed_libs(elf, blob)
  123. neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
  124. htbl = get_hashtbl(elf, blob, args)
  125. hashid = get_hash_id(args.hash16, args.crc32c)
  126. libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
  127. hashresolves = dict({})
  128. noresolves = []
  129. for x in htbl:
  130. done = False
  131. for l, v in libhashes.items():
  132. if x in v:
  133. hashresolves.setdefault(l, {})[x] = v[x]
  134. done = True
  135. break
  136. if not done:
  137. noresolves.append(x)
  138. for l, v in hashresolves.items():
  139. print("%s:" % l)
  140. for x in v.keys():
  141. print("\t%08x -> %s" % (x, v[x]))
  142. if len(noresolves) > 0:
  143. print("UNRESOLVED:")
  144. for x in noresolves:
  145. print("\t%08x" % x)
  146. return 0
  147. def main():
  148. parser = argparse.ArgumentParser()
  149. parser.add_argument('input', type=argparse.FileType('rb'),
  150. default=sys.stdin.buffer, help="input file")
  151. parser.add_argument('--cc',
  152. default=shutil.which('cc'), help="C compiler binary")
  153. parser.add_argument('--readelf',
  154. default=shutil.which('readelf'), help="readelf binary")
  155. parser.add_argument('--map', type=argparse.FileType('r'), help=\
  156. "Get the address of the symbol hash table from the "+\
  157. "linker map output instead of attempting to parse the"+\
  158. " binary.")
  159. hashgrp = parser.add_mutually_exclusive_group()
  160. hashgrp.add_argument('-s', '--hash16', default=False, action='store_true', \
  161. help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\
  162. "Only usable for 32-bit output.")
  163. hashgrp.add_argument('-c', '--crc32c', default=False, action='store_true', \
  164. help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
  165. args = parser.parse_args()
  166. return do_smoldd_run(args)
  167. if __name__ == '__main__':
  168. rv = main()
  169. if rv is None: pass
  170. else:
  171. try: sys.exit(int(rv))
  172. except Exception: sys.exit(1)