From 237ba99104bd7931f79a9689e331d169f837a6e3 Mon Sep 17 00:00:00 2001 From: PoroCYon Date: Wed, 6 Mar 2019 01:32:39 +0100 Subject: [PATCH] more size crunching, but not completely perfect --- Makefile | 9 +-- ld/link.ld | 2 +- smol.py | 6 +- smolemit.py | 47 ++++-------- src/crt1.c | 1 + src/header64.asm | 4 +- src/loader64.asm | 177 +++++++++++--------------------------------- test/flag.c | 6 +- test/hello-_start.c | 8 +- 9 files changed, 80 insertions(+), 180 deletions(-) diff --git a/Makefile b/Makefile index 90cbdb4..5f53afe 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ TESTDIR:= test BITS ?= $(shell getconf LONG_BIT) # -mpreferred-stack-boundary=3 messes up the stack and kills SSE! -COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \ +# -fno-plt +COPTFLAGS=-Os -fvisibility=hidden -fwhole-program -fno-plt \ -ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \ -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables CXXOPTFLAGS=$(COPTFLAGS) \ @@ -27,19 +28,17 @@ ASFLAGS += -f elf64 endif LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary -SMOLFLAGS ?= #--libsep - CFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2) CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2) LIBS=-lc -ASFLAGS += -DUSE_INTERP -DALIGN_STACK +ASFLAGS += -DUSE_INTERP -DALIGN_STACK -DUSE_DT_DEBUG -DNO_START_ARG NASM ?= nasm PYTHON3 ?= python3 -all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag-crt $(BINDIR)/hello-_start +all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL diff --git a/ld/link.ld b/ld/link.ld index 5241486..fa5f7ef 100644 --- a/ld/link.ld +++ b/ld/link.ld @@ -1,7 +1,7 @@ OUTPUT_FORMAT(binary) SECTIONS { - . = 0x400000; + . = 0x10000; _smol_origin = .; .header : { KEEP(*(.header)) } diff --git a/smol.py b/smol.py index 08daa73..6a3cf1c 100755 --- a/smol.py +++ b/smol.py @@ -30,8 +30,8 @@ def main(): parser.add_argument('--readelf', default=shutil.which('readelf'), \ help="which readelf binary to use") - parser.add_argument('--libsep', default=False, action='store_true', \ - help="Separete import symbols per library, instead of looking at every library when resolving a symbol.") +# parser.add_argument('--libsep', default=False, action='store_true', \ +# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.") parser.add_argument('input', nargs='+', help="input object file") parser.add_argument('output', type=argparse.FileType('w'), \ @@ -64,7 +64,7 @@ def main(): symbols.setdefault(library, []) symbols[library].append((symbol, reloc)) - output(arch, symbols, args.libsep, args.output) + output(arch, symbols, args.output) if __name__ == '__main__': main() diff --git a/smolemit.py b/smolemit.py index a5a5ff0..0e7a6b8 100644 --- a/smolemit.py +++ b/smolemit.py @@ -3,11 +3,9 @@ import sys from smolshared import * -def output_x86(libraries, libsep, outf): +def output_x86(libraries, outf): outf.write('; vim: set ft=nasm:\n') # be friendly outf.write('bits 32\n') - if libsep: - outf.write('%define LIBSEP\n') shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } @@ -23,14 +21,14 @@ def output_x86(libraries, libsep, outf): # outf.write('_GLOBAL_OFFSET_TABLE_:\n') # outf.write('dd dynamic\n') outf.write('_strtab:\n') - if not libsep: - for library, symrels in libraries.items(): - outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) +# if not libsep: +# for library, symrels in libraries.items(): +# outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) outf.write('_symbols:\n') for library, symrels in libraries.items(): - if libsep: - outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) +# if libsep: + outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) for sym, reloc in symrels: # meh @@ -53,11 +51,9 @@ def output_x86(libraries, libsep, outf): # end output_x86 -def output_amd64(libraries, libsep, outf): +def output_amd64(libraries, outf): outf.write('; vim: set ft=nasm:\n') outf.write('bits 64\n') - if libsep: - outf.write('%define LIBSEP\n') shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } @@ -68,28 +64,16 @@ def output_amd64(libraries, libsep, outf): outf.write('dq (_symbols.{} - _strtab)\n'.format(shorts[library])) outf.write('dynamic.end:\n') - if libsep: - outf.write('[section .data.smolgot]\n') - else: - outf.write('[section .rodata.neededlibs]\n') -# if needgot: -# outf.write('global _GLOBAL_OFFSET_TABLE_\n') -# outf.write('_GLOBAL_OFFSET_TABLE_:\n') -# outf.write('dq dynamic\n') + outf.write('[section .rodata.neededlibs]\n') outf.write('_strtab:\n') - if not libsep: - for library, symrels in libraries.items(): - outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) + for library, symrels in libraries.items(): + outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) - if not libsep: - outf.write('[section .data.smolgot]\n') + outf.write('[section .data.smolgot]\n') outf.write('_symbols:\n') for library, symrels in libraries.items(): - if libsep: - outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library)) - for sym, reloc in symrels: if reloc != 'R_X86_64_PLT32' and reloc != 'R_X86_64_GOTPCRELX': eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!') @@ -105,9 +89,6 @@ global {name} outf.write('\t\t_symbols.{lib}.{name}: dq 0x{hash:x}\n'\ .format(lib=shorts[library],name=sym,hash=hash)) - if libsep: - outf.write('\tdq 0\n') - outf.write('db 0\n') outf.write('_symbols.end:\n') @@ -127,9 +108,9 @@ global {name} # end output_amd64 -def output(arch, libraries, libsep, outf): - if arch == 'i386': output_x86(libraries, libsep, outf) - elif arch == 'x86_64': output_amd64(libraries, libsep, outf) +def output(arch, libraries, outf): + if arch == 'i386': output_x86(libraries, outf) + elif arch == 'x86_64': output_amd64(libraries, outf) else: eprintf("E: cannot emit for arch '" + str(arch) + "'") sys.exit(1) diff --git a/src/crt1.c b/src/crt1.c index 767ee5a..aa5c423 100644 --- a/src/crt1.c +++ b/src/crt1.c @@ -15,6 +15,7 @@ int _start(void* stack) { int argc=*(size_t*)stack; char** argv=(void*)(&((size_t*)stack)[1]); + // TODO: fix this. it borks with -fno-plt __libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack); __builtin_unreachable(); diff --git a/src/header64.asm b/src/header64.asm index 976d50e..541243b 100644 --- a/src/header64.asm +++ b/src/header64.asm @@ -9,8 +9,8 @@ ehdr: ; e_ident db 0x7F, "ELF" - db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI - db EI_OSABIVERSION + db EI_CLASS, EI_DATA, EI_VERSION, 0;EI_OSABI + db 0;EI_OSABIVERSION times 7 db 0 dw ELF_TYPE ; e_type dw ELF_MACHINE ; e_machine diff --git a/src/loader64.asm b/src/loader64.asm index 5bca552..60a9752 100644 --- a/src/loader64.asm +++ b/src/loader64.asm @@ -1,6 +1,7 @@ ; vim: set ft=nasm: -%define R10_BIAS (0x2B8) +;%define R10_BIAS (0x2B4) +%define R10_BIAS (0x2B4+0x40) %include "rtld.inc" @@ -42,132 +43,41 @@ _smol_start: ; the second one isn't needed anymore, see code below (.next_link) %endif - mov rdi, r12 + push _smol_start + push r12 push -1 pop rcx - lea rax, [rel _smol_start] ; TODO: make offset positive! -repne scasq + pop rdi + pop rax +repne scasd ; technically, scasq should be used, but ehhhh sub rdi, r12 - sub rdi, LF_ENTRY_OFF+8 - xchg r9 , rdi + sub rdi, LF_ENTRY_OFF+4 + xchg r9, rdi - ;mov edi, _symbols - lea edi, [rel _symbols] - -%ifdef LIBSEP - ; for (rdi = (uint8_t*)_symbols; *rdi; ++rdi) { - .next_needed: - cmp byte [rdi], 0 - je .needed_end - - ; do { // iter over the link_map - .next_link: - ; entry = entry->l_next; - mov r12, [r12 + L_NEXT_OFF] ; skip the first one (this is our main - ; binary, it has no symbols) - lea r10, [r12 + r9 + R10_BIAS] - - ; keep the current symbol in a backup reg - push rdi - pop rdx - - ; r11 = basename(rsi = entry->l_name) - mov rsi, [r12 + L_NAME_OFF] - .basename: - push rsi - pop r11 - .basename.next: - lodsb - cmp al, '/' - cmove r11, rsi - or al, al - jnz short .basename.next - .basename.done: - - ; and place it back - push rdx - push rdx - pop rdi ; rdi == _symbol - pop rsi - - ; strcmp(rsi, r11) -> flags; rsi == first hash if matches - .strcmp: - lodsb - or al, al - jz short .strcmp.done - sub al, byte [r11] - cmovnz rsi, rdx - jnz short .next_link;.strcmp.done - inc r11 - jmp short .strcmp - .strcmp.done: - xchg rsi, rdi - - ; if (strcmp(...)) goto next_link; - ;cmovnz r12, [r12 + L_NEXT_OFF] ; this is guaranteed to be nonzero - ;jnz short .next_link ; because otherwise ld.so would have complained - - ; now we have the right link_map of the library, so all we have - ; to do now is to find the right symbol addresses corresponding - ; to the hashes. - - ; do { - .next_hash: - ; if (!*phash) break; - mov eax, dword [rdi] - or eax, eax - jz short .next_needed ; done the last hash, so move to the next lib - -;link_symbol(struct link_map* entry = r12, size_t* phash = rsi, uint32_t hash = eax) - - push rax - pop r11 - ; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets - xor edx, edx - mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS] - div ecx - - ; shift left because we don't want to compare the lowest bit - shr r11, 1 - - ; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind] - mov r8, [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS] - mov edx, dword [r8 + rdx * 4] - - ; do { - .next_chain: - ; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1 - mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS] - mov ecx, dword [rcx + rdx * 4] - shr ecx, 1 - - ; if (luhash == hash) break; - cmp ecx, r11d - je short .chain_break - ; ++bucket; } while (LIBSEP || (luhash & 1)) - inc edx - jne short .next_chain -%else -; !LIBSEP + push _symbols + ; back up link_map root push r12 - pop r11 ; back up link_map root + pop r11 + pop rdi + +;.loopme: jmp short .loopme ; debugging .next_hash: - mov eax, dword [rdi] - or al, al - jz short .needed_end - push r11 - push rax - push rax + mov r14d, dword [rdi] + ; assume we need at least one function +; or al, al +; jz short .needed_end + mov r12, r11 +; push r11 + push r14 pop rbx - pop r14 - pop r12 +; pop r12 ; shift left because we don't want to compare the lowest bit shr ebx, 1 .next_link: mov r12, [r12 + L_NEXT_OFF] - lea r10, [r12 + r9 + R10_BIAS] + lea r10, [r12 + r9 + R10_BIAS] ; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets xor edx, edx push r14 @@ -177,31 +87,30 @@ repne scasq ; uint32_t bucket(edx) = entry->l_gnu_buckets[bkt_ind] mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS] - mov edx, dword [r8 + rdx * 4] + mov ecx, dword [r8 + rdx * 4] - or edx, edx - jz short .next_link + ; can be ignored apparently? +; jecxz .next_link .next_chain: ; uint32_t luhash(ecx) = entry->l_gnu_chain_zero[bucket] >> 1 - mov rcx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS] - mov ecx, dword [rcx + rdx * 4] + mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS] + mov edx, dword [rdx + rcx * 4] - ; if (!(luhash & 1)) goto next_link; // nothing to be found in this lib. - mov al, cl - - shr ecx, 1 + ; TODO: make this not suck. (maybe using bt*?) + mov al, dl + shr edx, 1 ; if (luhash == hash) break; - cmp ecx, ebx + cmp edx, ebx je short .chain_break ; ++bucket; } while (luhash & 1); and al, 1 jnz short .next_link - inc edx + + inc ecx jmp short .next_chain -%endif .chain_break: ; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr @@ -213,21 +122,25 @@ repne scasq ; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr mov rax, [rax + D_UN_PTR_OFF] ; ElfW(Addr) symoff(rax) = symtab[bucket].st_value - lea rdx, [rdx + rdx * 2] + lea rdx, [rcx + rcx * 2] mov rax, [rax + rdx * 8 + ST_VALUE_OFF] ; void* finaladdr(rax) = symoff + entry->l_addr - mov rcx, [r12 + L_ADDR_OFF] - add rax, rcx + add rax, [r12 + L_ADDR_OFF] ; *phash = finaladdr stosq - + cmp byte [rdi], 0 + jne short .next_hash ; } while (1) - jmp short .next_hash +; jmp short .next_hash .needed_end: - ;xor rbp, rbp ; still 0 from _dl_start_user +; int3 ; debugging +; xor rbp, rbp ; still 0 from _dl_start_user +%ifndef NO_START_ARG + ; arg for _start mov rdi, rsp +%endif %ifdef ALIGN_STACK push rax %endif diff --git a/test/flag.c b/test/flag.c index f085324..53e060d 100644 --- a/test/flag.c +++ b/test/flag.c @@ -10,9 +10,9 @@ #define COLOR(r, g, b) ((r << 16) + (g << 8) + b) -/*__attribute__((__used__)) -void _start() {*/ -int main() { +__attribute__((__used__,__externally_visible__)) +void _start() { +/*int main() {*/ #ifdef MAKE_ESC_WORK Atom wmDeleteMessage; #endif diff --git a/test/hello-_start.c b/test/hello-_start.c index b7f14d0..ee16526 100644 --- a/test/hello-_start.c +++ b/test/hello-_start.c @@ -3,9 +3,15 @@ const char *f = "foo"; -__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__)) +__attribute__((__externally_visible__, __section__(".text.startup._start"), + __noreturn__ +#ifndef __clang__ + , __naked__ +#endif +)) int _start(void) { puts("Hello World!");//printf("hello world %s\n", f); asm volatile("int3");//exit(42); __builtin_unreachable(); } +