#!/usr/bin/env python3 import argparse import glob import itertools import os, os.path import shutil import subprocess import sys import tempfile from smol.shared import * from smol.parse import * from smol.emit import * from smol.cnl import * def preproc_args(args): if args.hash16 and args.crc32c: # shouldn't happen anymore error("Cannot combine --hash16 and --crc32c!") if args.debug: args.cflags.append('-g') args.ldflags.append('-g') args.asflags.append('-g') if args.hash16 or args.crc32c: args.fuse_dnload_loader = True args.fskip_zero_value = args.fskip_zero_value or args.fuse_dnload_loader if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE") if args.fuse_nx: args.asflags.insert(0, "-DUSE_NX") if args.fskip_entries: args.asflags.insert(0, "-DSKIP_ENTRIES") if args.funsafe_dynamic: args.asflags.insert(0, "-DUNSAFE_DYNAMIC") if args.fno_start_arg: args.asflags.insert(0, "-DNO_START_ARG") if args.fuse_dl_fini: args.asflags.insert(0, "-DUSE_DL_FINI") if args.fuse_dt_debug: args.asflags.insert(0, "-DUSE_DT_DEBUG") if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER") if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP") if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK") if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT") if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV") if args.hang_on_startup: args.asflags.insert(0, "-DHANG_ON_STARTUP") for x in ['nasm','cc','readelf']: val = args.__dict__[x] if val is None or not os.path.isfile(val): error("'%s' binary%s not found" % (x, ("" if val is None else (" ('%s')" % val)))) arch = args.target.tolower() if len(args.target) != 0 else decide_arch(args.input) if arch not in archmagic: error("Unknown/unsupported architecture '%s'" % str(arch)) if args.verbose: eprintf("arch: %s" % str(arch)) if args.hash16 and arch not in ('i386', 3): error("Cannot use --hash16 for arch `%s' (not i386)" % (arch)) return args, arch def do_smol_run(args, arch): objinput = None objinputistemp = False tmp_asm_file, tmp_elf_fd, tmp_elf_file = None, None, None if not args.gen_rt_only: tmp_asm_file = tempfile.mkstemp(prefix='smoltab',suffix='.asm',text=True) tmp_asm_fd = tmp_asm_file[0] tmp_asm_file = tmp_asm_file[1] tmp_elf_file = tempfile.mkstemp(prefix='smolout',suffix='.o') os.close(tmp_elf_file[0]) tmp_elf_file = tmp_elf_file[1] try: #for inp in args.input: # if not is_valid_elf(inp): # error("Input file '%s' is not a valid ELF file!" % inp) # if >1 input OR input is LTO object: if len(args.input) > 1 or has_lto_object(args.readelf, args.input): fd, objinput = tempfile.mkstemp(prefix='smolin',suffix='.o') objinputistemp = True os.close(fd) cc_relink_objs(args.verbose, args.cc, arch, args.input, objinput, args.cflags) else: objinput = args.input[0] # generate smol hashtab cc_paths = get_cc_paths(args.cc) syms = get_needed_syms(args.readelf, objinput) spaths = args.libdir + cc_paths['libraries'] libraries = cc_paths['libraries'] libs = find_libs(spaths, args.library) if args.verbose: eprintf("libs = %s" % str(libs)) libs_symbol_map = build_symbol_map(args.readelf, libs) #symbols = {} #for symbol, reloc in syms: # if symbol not in libs_symbol_map: # error("could not find symbol: {}".format(symbol)) # libs_for_symbol = libs_symbol_map[symbol] # if len(libs_for_symbol) > 1: # error("E: the symbol '%s' is provided by more than one library: %s" # % (symbol, str(libs_for_symbol))) # library = libs_for_symbol.pop() # symbols.setdefault(library, []) # symbols[library].append((symbol, reloc)) symbols = resolve_extern_symbols(syms, libs_symbol_map, args) with (open(args.output,'w') if args.gen_rt_only else os.fdopen(tmp_asm_fd, mode='w')) as taf: output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det) if args.verbose: eprintf("wrote symtab to %s" % tmp_asm_file) if not args.gen_rt_only: # assemble hash table/ELF header nasm_assemble_elfhdr(args.verbose, args.nasm, arch, args.smolrt, tmp_asm_file, tmp_elf_file, args.asflags) # link with LD into the final executable, w/ special linker script ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file], args.output, args.ldflags, False) if args.debugout is not None: ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file], args.debugout, args.ldflags, True) finally: if not args.keeptmp: if objinputistemp: os.remove(objinput) if not args.gen_rt_only: os.remove(tmp_asm_file) os.remove(tmp_elf_file) def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--target', default='', \ help='architecture to generate asm code for (default: auto)') parser.add_argument('-l', '--library', default=[], metavar='LIB', action='append', \ help='libraries to link against') parser.add_argument('-L', '--libdir', default=[], metavar='DIR', action='append', \ help="directories to search libraries in") hashgrp = parser.add_mutually_exclusive_group() hashgrp.add_argument('-s', '--hash16', default=False, action='store_true', \ help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\ "Implies -fuse-dnload-loader. Only usable for 32-bit output.") hashgrp.add_argument('-c', '--crc32c', default=False, action='store_true', \ help="Use Intel's crc32 intrinsic for hashing. "+\ "Implies -fuse-dnload-loader. Conflicts with `--hash16'.") parser.add_argument('-n', '--nx', default=False, action='store_true', \ help="Use NX (i.e. don't use RWE pages). Costs the size of one phdr, "+\ "plus some extra bytes on i386.") parser.add_argument('-d', '--det', default=False, action='store_true', \ help="Make the order of imports deterministic (default: just use " + \ "whatever binutils throws at us)") parser.add_argument('-g', '--debug', default=False, action='store_true', \ help="Pass `-g' to the C compiler, assembler and linker. Only useful "+\ "when `--debugout' is specified.") parser.add_argument('-fuse-interp', default=True, action='store_true', \ help="[Default ON] Include a program interpreter header (PT_INTERP). If not " +\ "enabled, ld.so has to be invoked manually by the end user. "+\ "Disable with `-fno-use-interp'.", dest="fuse_interp") parser.add_argument('-fno-use-interp', action='store_false', \ dest="fuse_interp", help=argparse.SUPPRESS) parser.add_argument('-falign-stack', default=True, action='store_true', \ help="[Default ON] Align the stack before running user code (_start). If not " + \ "enabled, this has to be done manually. Costs 1 byte. Disable "+\ "with `-fno-align-stack'.", dest="falign_stack") parser.add_argument('-fno-align-stack', action='store_false', \ dest="falign_stack", help=argparse.SUPPRESS) parser.add_argument('-fskip-zero-value', default=None, action='store_true', \ help="[Default: ON if `-fuse-dnload-loader' supplied, OFF otherwise] "+\ "Skip an ELF symbol with a zero address (a weak symbol) when "+\ "parsing libraries at runtime. Try enabling this if you're "+\ "experiencing sudden breakage. However, many libraries don't use "+\ "weak symbols, so this doesn't often pose a problem. Costs ~5 bytes."+\ "Disable with `-fno-skip-zero-value'.", dest="fskip_zero_value") parser.add_argument('-fno-skip-zero-value', default=None, action='store_false', \ dest="fskip_zero_value", help=argparse.SUPPRESS) parser.add_argument('-fifunc-support', default=True, action='store_true', \ help="[Default ON] Support linking to IFUNCs. Probably needed on x86_64, but costs "+\ "~16 bytes. Ignored on platforms without IFUNC support. Disable "+\ "with `-fno-fifunc-support'.", dest="fifunc_support") parser.add_argument('-fno-ifunc-support', action='store_false', \ dest="fifunc_support", help=argparse.SUPPRESS) parser.add_argument('-fuse-dnload-loader', default=False, action='store_true', \ help="Use a dnload-style loader for resolving symbols, which doesn't "+\ "depend on nonstandard/undocumented ELF and ld.so features, but "+\ "is slightly larger. If not enabled, a smaller custom loader is "+\ "used which assumes glibc. `-fskip-zero-value' defaults to ON if "+\ "this flag is supplied.") parser.add_argument('-fuse-nx', default=False, action='store_true', \ help="Don't use one big RWE segment, but use separate RW and RE ones."+\ " Use this to keep strict kernels (PaX/grsec) happy. Costs at "+\ "least the size of one program header entry.") parser.add_argument('-fuse-dt-debug', default=False, action='store_true', \ help="Use the DT_DEBUG Dyn header to access the link_map, which doesn't"+\ " depend on nonstandard/undocumented ELF and ld.so features. If "+\ "not enabled, the link_map is accessed using data leaked to the "+\ "entrypoint by ld.so, which assumes glibc. Costs ~10 bytes.") parser.add_argument('-fuse-dl-fini', default=False, action='store_true', \ help="Pass _dl_fini to the user entrypoint, which should be done to "+\ "properly comply with all standards, but is very often not "+\ "needed at all. Costs 2 bytes.") parser.add_argument('-fskip-entries', default=False, action='store_true', \ help="Skip the first two entries in the link map (resp. ld.so and "+\ "the vDSO). Speeds up symbol resolving, but costs ~5 bytes.") parser.add_argument('-fno-start-arg', default=False, action='store_true', \ help="Don't pass a pointer to argc/argv/envp to the entrypoint using "+\ "the standard calling convention. This means you need to read "+\ "these yourself in assembly if you want to use them! (envp is "+\ "a preprequisite for X11, because it needs $DISPLAY.) Frees 3 bytes.") parser.add_argument('-funsafe-dynamic', default=False, action='store_true', \ help="Don't end the ELF Dyn table with a DT_NULL entry. This might "+\ "cause ld.so to interpret the entire binary as the Dyn table, "+\ "so only enable this if you're sure this won't break things!") parser.add_argument('-fifunc-strict-cconv', default=False, action='store_true', \ help="On i386, if -fifunc-support is specified, strictly follow the "+\ "calling convention rules. Probably not needed, but you never know.") parser.add_argument('--nasm', default=os.getenv('NASM') or shutil.which('nasm'), \ help="which nasm binary to use") parser.add_argument('--cc', default=os.getenv('CC') or shutil.which('cc'), \ help="which cc binary to use (MUST BE GCC!)") parser.add_argument('--readelf', default=os.getenv('READELF') or shutil.which('readelf'), \ help="which readelf binary to use") parser.add_argument('-Wc','--cflags', default=[], metavar='CFLAGS', action='append', help="Flags to pass to the C compiler for the relinking step") parser.add_argument('-Wa','--asflags', default=[], metavar='ASFLAGS', action='append', help="Flags to pass to the assembler when creating the ELF header and runtime startup code") parser.add_argument('-Wl','--ldflags', default=[], metavar='LDFLAGS', action='append', help="Flags to pass to the linker for the final linking step") parser.add_argument('--smolrt', default=os.getcwd()+"/rt", help="Directory containing the smol runtime sources") parser.add_argument('--smolld', default=os.getcwd()+"/ld", help="Directory containing the smol linker scripts") parser.add_argument('--gen-rt-only', default=False, action='store_true', \ help="Only generate the headers/runtime assembly source file, instead"+\ " of doing a full link. (I.e. fall back to pre-release behavior.)") parser.add_argument('--verbose', default=False, action='store_true', \ help="Be verbose about what happens and which subcommands are invoked") parser.add_argument('--keeptmp', default=False, action='store_true', \ help="Keep temp files (only useful for debugging)") parser.add_argument('--debugout', type=str, default=None, \ help="Write out an additional, unrunnable debug ELF file with symbol "+\ "information. (Useful for debugging with gdb, cannot be ran due "+\ "to broken relocations.)") parser.add_argument('--hang-on-startup', default=False, action='store_true', \ help="Hang on startup until a debugger breaks the code out of the "+\ "loop. Only useful for debugging.") parser.add_argument('input', nargs='+', help="input object file") parser.add_argument('output', type=str, help="output binary") args = parser.parse_args() args, arch = preproc_args(args) do_smol_run(args, arch) if __name__ == '__main__': rv = main() if rv is None: pass else: try: sys.exit(int(rv)) except: sys.exit(1)