{simple,shoddy,smart} minsize-oriented linker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
9.5 KiB

  1. #!/usr/bin/env python3
  2. import argparse
  3. import glob
  4. import itertools
  5. import os, os.path
  6. import shutil
  7. import subprocess
  8. import sys
  9. import tempfile
  10. from smol.shared import *
  11. from smol.parse import *
  12. from smol.emit import *
  13. from smol.cnl import *
  14. def main():
  15. parser = argparse.ArgumentParser()
  16. parser.add_argument('-m', '--target', default='', \
  17. help='architecture to generate asm code for (default: auto)')
  18. parser.add_argument('-l', '--library', default=[], metavar='LIB', action='append', \
  19. help='libraries to link against')
  20. parser.add_argument('-L', '--libdir', default=[], metavar='DIR', action='append', \
  21. help="directories to search libraries in")
  22. parser.add_argument('-s', '--hash16', default=False, action='store_true', \
  23. help="Use 16-bit (BSD) hashes instead of 32-bit djb2 hashes. "+\
  24. "Implies -fuse-dnload-loader")
  25. parser.add_argument('-n', '--nx', default=False, action='store_true', \
  26. help="Use NX (i.e. don't use RWE pages). Costs the size of one phdr, "+\
  27. "plus some extra bytes on i386.")
  28. parser.add_argument('-d', '--det', default=False, action='store_true', \
  29. help="Make the order of imports deterministic (default: just use " + \
  30. "whatever binutils throws at us)")
  31. parser.add_argument('-fuse-interp', default=False, action='store_true', \
  32. help="Include a program interpreter header (PT_INTERP). If not " +\
  33. "enabled, ld.so has to be invoked manually by the end user.")
  34. parser.add_argument('-falign-stack', default=False, action='store_true', \
  35. help="Align the stack before running user code (_start). If not " + \
  36. "enabled, this has to be done manually. Costs 1 byte.")
  37. parser.add_argument('-fuse-nx', default=False, action='store_true', \
  38. help="Don't use one big RWE segment, but use separate RW and RE ones."+\
  39. " Use this to keep strict kernels (PaX/grsec) happy. Costs at "+\
  40. "least the size of one program header entry.")
  41. parser.add_argument('-fuse-dnload-loader', default=False, action='store_true', \
  42. help="Use a dnload-style loader for resolving symbols, which doesn't "+\
  43. "depend on nonstandard/undocumented ELF and ld.so features, but "+\
  44. "is slightly larger. If not enabled, a smaller custom loader is "+\
  45. "used which assumes glibc.")
  46. parser.add_argument('-fskip-zero-value', default=False, action='store_true', \
  47. help="Skip an ELF symbol with a zero address (a weak symbol) when "+\
  48. "parsing libraries at runtime. Try enabling this if you're "+\
  49. "experiencing sudden breakage. However, many libraries don't use "+\
  50. "weak symbols, so this doesn't often pose a problem. Costs ~5 bytes.")
  51. parser.add_argument('-fuse-dt-debug', default=False, action='store_true', \
  52. help="Use the DT_DEBUG Dyn header to access the link_map, which doesn't"+\
  53. " depend on nonstandard/undocumented ELF and ld.so features. If "+\
  54. "not enabled, the link_map is accessed using data leaked to the "+\
  55. "entrypoint by ld.so, which assumes glibc. Costs ~10 bytes.")
  56. parser.add_argument('-fuse-dl-fini', default=False, action='store_true', \
  57. help="Pass _dl_fini to the user entrypoint, which should be done to "+\
  58. "properly comply with all standards, but is very often not "+\
  59. "needed at all. Costs 2 bytes.")
  60. parser.add_argument('-fskip-entries', default=False, action='store_true', \
  61. help="Skip the first two entries in the link map (resp. ld.so and "+\
  62. "the vDSO). Speeds up symbol resolving, but costs ~5 bytes.")
  63. parser.add_argument('-fno-start-arg', default=False, action='store_true', \
  64. help="Don't pass a pointer to argc/argv/envp to the entrypoint using "+\
  65. "the standard calling convention. This means you need to read "+\
  66. "these yourself in assembly if you want to use them! (envp is "+\
  67. "a preprequisite for X11, because it needs $DISPLAY.) Frees 3 bytes.")
  68. parser.add_argument('-funsafe-dynamic', default=False, action='store_true', \
  69. help="Don't end the ELF Dyn table with a DT_NULL entry. This might "+\
  70. "cause ld.so to interpret the entire binary as the Dyn table, "+\
  71. "so only enable this if you're sure this won't break things!")
  72. parser.add_argument('--nasm', default=os.getenv('NASM') or shutil.which('nasm'), \
  73. help="which nasm binary to use")
  74. parser.add_argument('--cc', default=os.getenv('CC') or shutil.which('cc'), \
  75. help="which cc binary to use (MUST BE GCC!)")
  76. parser.add_argument('--scanelf', default=os.getenv('SCANELF') or shutil.which('scanelf'), \
  77. help="which scanelf binary to use")
  78. parser.add_argument('--readelf', default=os.getenv('READELF') or shutil.which('readelf'), \
  79. help="which readelf binary to use")
  80. parser.add_argument('--cflags', default=[], metavar='CFLAGS', action='append',
  81. help="Flags to pass to the C compiler for the relinking step")
  82. parser.add_argument('--asflags', default=[], metavar='ASFLAGS', action='append',
  83. help="Flags to pass to the assembler when creating the ELF header and runtime startup code")
  84. parser.add_argument('--ldflags', default=[], metavar='LDFLAGS', action='append',
  85. help="Flags to pass to the linker for the final linking step")
  86. parser.add_argument('--smolrt', default=os.getcwd()+"/rt",
  87. help="Directory containing the smol runtime sources")
  88. parser.add_argument('--smolld', default=os.getcwd()+"/ld",
  89. help="Directory containing the smol linker scripts")
  90. parser.add_argument('--verbose', default=False, action='store_true', \
  91. help="Be verbose about what happens and which subcommands are invoked")
  92. parser.add_argument('--keeptmp', default=False, action='store_true', \
  93. help="Keep temp files (only useful for debugging)")
  94. parser.add_argument('input', nargs='+', help="input object file")
  95. parser.add_argument('output', type=str, help="output binary")
  96. args = parser.parse_args()
  97. if args.hash16:
  98. args.fuse_dnload_loader = True
  99. if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE")
  100. if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX")
  101. if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES")
  102. if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC")
  103. if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG")
  104. if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI")
  105. if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG")
  106. if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER")
  107. if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP")
  108. if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK")
  109. for x in ['nasm','cc','scanelf','readelf']:
  110. val = args.__dict__[x]
  111. if val is None or not os.path.isfile(val):
  112. error("'" + x + "' binary" + (" " if val is None
  113. else " ('" + val + "')") + " not found")
  114. arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input)
  115. if arch not in archmagic:
  116. error("Unknown/unsupported architecture '" + str(arch) + "'")
  117. if args.verbose: eprintf("arch: %s" % arch)
  118. objinput = None
  119. objinputistemp = False
  120. tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True)
  121. tmp_asm_fd = tmp_asm_file[0]
  122. tmp_asm_file = tmp_asm_file[1]
  123. tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o')
  124. os.close(tmp_elf_file[0])
  125. tmp_elf_file = tmp_elf_file[1]
  126. try:
  127. # if >1 input OR input is LTO object:
  128. if len(args.input) > 1 or has_lto_object(args.readelf, args.input):
  129. fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o')
  130. os.close(fd)
  131. cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags)
  132. else: objinput = args.input[0]
  133. # generate smol hashtab
  134. cc_paths = get_cc_paths(args.cc)
  135. syms = get_needed_syms(args.readelf, objinput)
  136. spaths = args.libdir + cc_paths['libraries']
  137. libraries = cc_paths['libraries']
  138. libs = list(find_libs(spaths, args.library))
  139. if args.verbose: eprintf("libs = " + str(libs))
  140. symbols = {}
  141. for symbol, reloc in syms:
  142. library = find_symbol(args.scanelf, libs, args.library, symbol)
  143. if not library:
  144. error("could not find symbol: {}".format(symbol))
  145. symbols.setdefault(library, [])
  146. symbols[library].append((symbol, reloc))
  147. with os.fdopen(tmp_asm_fd, mode='w') as taf:
  148. output(arch, symbols, args.nx, args.hash16, taf, args.det)
  149. if args.verbose:
  150. eprintf("wrote symtab to %s" % tmp_asm_file)
  151. # assemble hash table/ELF header
  152. nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt,
  153. tmp_asm_file, tmp_elf_file, args.asflags)
  154. # link with LD into the final executable, w/ special linker script
  155. ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file],
  156. args.output, args.ldflags)
  157. finally:
  158. if not args.keeptmp:
  159. if objinputistemp: os.remove(objinput)
  160. os.remove(tmp_asm_file)
  161. os.remove(tmp_elf_file)
  162. if __name__ == '__main__':
  163. rv = main()
  164. if rv is None: pass
  165. else:
  166. try: sys.exit(int(rv))
  167. except: sys.exit(1)