diff --git a/.gitignore b/.gitignore index 6787451..70ce409 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /bin /obj __pycache__ +smol-*-*-*/ +*.tar.xz diff --git a/Makefile b/Makefile index 309c7b0..8e58c9a 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ SRCDIR := rt TESTDIR:= test NASM ?= nasm +OBJCOPY ?= objcopy BITS ?= $(shell getconf LONG_BIT) @@ -35,12 +36,12 @@ else CFLAGS += -march=nocona endif -LIBS = $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 -lc #-lGL +LIBS = $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 -lm -lc #-lGL PWD ?= . SMOLFLAGS = --smolrt "$(PWD)/rt" --smolld "$(PWD)/ld" \ - -falign-stack -fuse-interp \ + -falign-stack -fuse-interp -fifunc-support \ --verbose #--keeptmp # -fuse-dnload-loader -fskip-zero-value -fuse-nx -fskip-entries -fuse-dt-debug # -fuse-dl-fini -fno-start-arg -funsafe-dynamic @@ -67,11 +68,13 @@ $(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/ $(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/ $(CC) $(CFLAGS) -c "$<" -o "$@" -$(BINDIR)/%: $(OBJDIR)/%.o $(BINDIR)/ - $(PYTHON3) ./smold.py $(SMOLFLAGS) $(LIBS) "$<" "$@" +$(BINDIR)/%.dbg $(BINDIR)/%: $(OBJDIR)/%.o $(BINDIR)/ + $(PYTHON3) ./smold.py --debugout $(SMOLFLAGS) --ldflags=-Wl,-Map=$(BINDIR)/$*.map $(LIBS) "$<" "$@.dbg" + $(PYTHON3) ./smold.py $(SMOLFLAGS) --ldflags=-Wl,-Map=$(BINDIR)/$*.map $(LIBS) "$<" "$@" $(PYTHON3) ./smoltrunc.py "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@" -$(BINDIR)/%-crt: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o $(BINDIR)/ +$(BINDIR)/%-crt.dbg $(BINDIR)/%-crt: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o $(BINDIR)/ + $(PYTHON3) ./smold.py --debugout $(SMOLFLAGS) --ldflags=-Wl,-Map=$(BINDIR)/$*-crt.map $(LIBS) "$<" $(OBJDIR)/crt1.lto.o "$@.dbg" $(PYTHON3) ./smold.py $(SMOLFLAGS) --ldflags=-Wl,-Map=$(BINDIR)/$*-crt.map $(LIBS) "$<" $(OBJDIR)/crt1.lto.o "$@" $(PYTHON3) ./smoltrunc.py "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@" diff --git a/ld/link.ld b/ld/link.ld index fa5f7ef..5f85c96 100644 --- a/ld/link.ld +++ b/ld/link.ld @@ -1,10 +1,19 @@ -OUTPUT_FORMAT(binary) +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_smol_start) +SEARCH_DIR("/usr/x86_64-pc-linux-gnu/lib64"); SEARCH_DIR("/usr/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/usr/x86_64-pc-linux-gnu/lib"); +/*OUTPUT_FORMAT(binary)*/ + +PHDRS { + rodttxt PT_LOAD FLAGS(5); /* r-x */ + rwdt PT_LOAD FLAGS(6); /* rw- */ +} SECTIONS { . = 0x10000; _smol_origin = .; - .header : { KEEP(*(.header)) } + .header : { KEEP(*(.header)) } :rodttxt _smol_text_start = .; _smol_text_off = _smol_text_start - _smol_origin; @@ -15,7 +24,7 @@ SECTIONS { KEEP(*(.text.startup.smol)) KEEP(*(.text.startup._start)) *(.text .text.*) - } + } :rodttxt _smol_text_end = .; _smol_text_size = _smol_text_end - _smol_text_start; @@ -26,10 +35,10 @@ SECTIONS { .data : { KEEP(*(.data.smolgot)) *(.data .data.* .tdata .tdata.*) - } + } :rwdt - .dynamic : { *(.dynamic) } :all :dyn - .dynstuff : { *(.symtab .strtab .shstrtab .rel.text .got.plt .gnu.linkonce.* .plt .plt.got .interp) } :all + .dynamic : { *(.dynamic) } :rodttxt :dyn + .dynstuff : { *(.symtab .strtab .shstrtab .rel.text .got.plt .gnu.linkonce.* .plt .plt.got .interp) } :rodttxt _smol_data_end = .; _smol_data_size = _smol_data_end - _smol_data_start; @@ -39,7 +48,7 @@ SECTIONS { _smol_bss_off = _smol_bss_start - _smol_origin; .bss : { *(.bss .bss.* .tbss .tbss.* .sbss .sbss.*) - } + } :rwdt _smol_bss_end = .; _smol_bss_size = _smol_bss_end - _smol_bss_start; diff --git a/rt/elf.inc b/rt/elf.inc index 8bc708d..5064271 100644 --- a/rt/elf.inc +++ b/rt/elf.inc @@ -34,17 +34,22 @@ %define DT_SYMTAB ( 6) %define DT_DEBUG (21) -%define ST_NAME_OFF ( 0) +%define ST_NAME_OFF ( 0) +%define ST_INFO__STT_MASK (15) +%define STT_GNU_IFUNC (10) + ; ,---- not 16? ; what's this comment??! ; v %if __BITS__ == 32 %define D_UN_PTR_OFF ( 4) %define ST_VALUE_OFF ( 4) +%define ST_INFO_OFF (12) %define SYMTAB_SIZE (16) %define ELF_DYN_SZ ( 8) %else %define D_UN_PTR_OFF ( 8) %define ST_VALUE_OFF ( 8) +%define ST_INFO_OFF ( 4) %define SYMTAB_SIZE (24) %define ELF_DYN_SZ (16) %endif diff --git a/rt/loader64.asm b/rt/loader64.asm index 9a093bd..89c2798 100644 --- a/rt/loader64.asm +++ b/rt/loader64.asm @@ -127,16 +127,51 @@ _smol_start: jmp short .next_link .hasheq: +%ifdef IFUNC_SUPPORT + mov cl , [rdx + ST_INFO_OFF] +%endif mov rax, [rdx + ST_VALUE_OFF] %ifdef SKIP_ZERO_VALUE or rax, rax jz short .next_link %endif add rax, [r12 + L_ADDR_OFF] +%ifdef IFUNC_SUPPORT + and cl, ST_INFO__STT_MASK + cmp cl, STT_GNU_IFUNC + je .ifunc + .no_ifunc: +%endif stosq cmp word [rdi], 0 jne short .next_hash +%ifdef IFUNC_SUPPORT + jmp short .break_loop + .ifunc: + ;;int3 ; in this call, we lose rax rcx rdx rsi rdi r8 r9 r10 r11 + ; we only need persistence for rdi and r11 tho + ;push rcx + ;push rdx + ;push rsi + push rdi + ;push r8 + ;push r9 + ;push r10 + push r11 + call rax + pop r11 + ;pop r10 + ;pop r9 + ;pop r8 + pop rdi + ;pop rsi + ;pop rdx + ;pop rcx + jmp short .no_ifunc + .break_loop: +%endif + ; if USE_DNLOAD_LOADER %else push _smol_start @@ -219,6 +254,12 @@ repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller mov rax, [rax + D_UN_PTR_OFF] ; ElfW(Addr) symoff(rax) = symtab[bucket].st_value lea rdx, [rcx + rcx * 2] + +%ifdef IFUNC_SUPPORT + ; large opcode, but, ~almost the same as the next one, so, + ; should compress well + mov cl, [rax + rdx * 8 + ST_INFO_OFF] ; TODO: actually mov cl, ... +%endif mov rax, [rax + rdx * 8 + ST_VALUE_OFF] %ifdef SKIP_ZERO_VALUE or rax, rax ; zero value => weak symbol or sth @@ -227,6 +268,20 @@ repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller ; void* finaladdr(rax) = symoff + entry->l_addr add rax, [r12 + L_ADDR_OFF] +%ifdef IFUNC_SUPPORT + ; is this an ifunc? + and cl, ST_INFO__STT_MASK + cmp cl, STT_GNU_IFUNC + jne .no_ifunc + ; if so: call the resolver + push rdi + push r11 + call rax + pop r11 + pop rdi + .no_ifunc: +%endif + ; *phash = finaladdr stosq cmp word [rdi], 0 diff --git a/smol/cnl.py b/smol/cnl.py index 9d962c8..6879ce2 100644 --- a/smol/cnl.py +++ b/smol/cnl.py @@ -28,12 +28,14 @@ def nasm_assemble_elfhdr(verbose, nasm_bin, arch, rtdir, intbl, output, asflags) if verbose: eprintf("nasm: %s" % repr(args)) subprocess.check_call(args, stdout=subprocess.DEVNULL) -def ld_link_final(verbose, cc_bin, arch, lddir, inobjs, output, ldflags): +def ld_link_final(verbose, cc_bin, arch, lddir, inobjs, output, ldflags, debug): archflag = '-m64' if arch == "x86_64" else '-m32' - args = [cc_bin, archflag, '-T', lddir+'/link.ld', \ - '-Wl,--oformat=binary', '-nostartfiles', '-nostdlib', \ - '-o', output] + inobjs + ldflags + args = [cc_bin, archflag, '-T', lddir+'/link.ld', '-no-pie'] + if not debug: + args.append('-Wl,--oformat=binary') + #args = [*args, '-T', lddir+'/link.ld', '-Wl,--oformat=binary'] + args += ['-nostartfiles', '-nostdlib', '-o', output, *inobjs, *ldflags] if verbose: eprintf("ld: %s" % repr(args)) subprocess.check_call(args, stdout=subprocess.DEVNULL) diff --git a/smold.py b/smold.py index adb706f..71e262b 100755 --- a/smold.py +++ b/smold.py @@ -74,6 +74,9 @@ def main(): help="Don't end the ELF Dyn table with a DT_NULL entry. This might "+\ "cause ld.so to interpret the entire binary as the Dyn table, "+\ "so only enable this if you're sure this won't break things!") + parser.add_argument('-fifunc-support', default=False, action='store_true', \ + help="Support linking to IFUNCs. Probably needed on x86_64, but costs "+\ + "~16 bytes. Ignored on platforms other than x86_64 or aarch64.") parser.add_argument('--nasm', default=os.getenv('NASM') or shutil.which('nasm'), \ help="which nasm binary to use") @@ -99,6 +102,10 @@ def main(): help="Be verbose about what happens and which subcommands are invoked") parser.add_argument('--keeptmp', default=False, action='store_true', \ help="Keep temp files (only useful for debugging)") + parser.add_argument('--debugout', default=False, action='store_true', \ + help="Output an unrunnable debug ELF file with symbol information. "+\ + "(Useful for debugging with gdb, cannot be ran due to broken "+\ + "relocations.)") parser.add_argument('input', nargs='+', help="input object file") parser.add_argument('output', type=str, help="output binary") @@ -118,6 +125,7 @@ def main(): if args.fuse_dnload_loader: args.asflags.insert(0, "-DUSE_DNLOAD_LOADER") if args.fuse_interp: args.asflags.insert(0, "-DUSE_INTERP") if args.falign_stack: args.asflags.insert(0, "-DALIGN_STACK") + if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT") for x in ['nasm','cc','scanelf','readelf']: val = args.__dict__[x] @@ -172,7 +180,7 @@ def main(): # link with LD into the final executable, w/ special linker script ld_link_final(args.verbose, args.cc, arch, args.smolld, [objinput, tmp_elf_file], - args.output, args.ldflags) + args.output, args.ldflags, args.debugout) finally: if not args.keeptmp: if objinputistemp: os.remove(objinput) diff --git a/test/hello.c b/test/hello.c index ff8b7b2..abea42e 100644 --- a/test/hello.c +++ b/test/hello.c @@ -1,6 +1,7 @@ #include #include +#include const char *f = "foo"; @@ -10,6 +11,7 @@ int main(int argc, char* argv[]) { printf("argv=%p\n", (void*)argv); for (int i = 0; i < argc; ++i) printf("argv[%d](%p)=%s\n", i, (void*)argv[i], argv[i]); + printf("sin(%d)=%f\n", argc, sinf(argc)); exit(42); }