From 75c1cfe6036123849c554e3c50a690fd6a304591 Mon Sep 17 00:00:00 2001 From: PoroCYon Date: Sat, 2 Feb 2019 18:14:28 +0100 Subject: [PATCH] add docs etc --- README.md | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ src/header64.asm | 32 +++++++++++++---- src/loader32.asm | 10 ++++++ src/loader64.asm | 23 +++++------- 4 files changed, 138 insertions(+), 21 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..33d1963 --- /dev/null +++ b/README.md @@ -0,0 +1,94 @@ +# smol + +Shoddy minsize-oriented linker + +PoC by Shiz, bugfixing and 64-bit version by PoroCYon. + +## Usage + +```sh +./smol.py -lfoo -lbar input.o... smol-output.asm +nasm -I src/ [-DUSE_NX] [-DUSE_DL_FINI] -o nasm-output.o smol-output.asm +ld -T ld/link.ld -o binary nasm-output.o input.o... +``` + +``` +usage: smol.py [-h] [-m TARGET] [-l LIB] [-L DIR] [--nasm NASM] [--cc CC] + [--scanelf SCANELF] [--readelf READELF] + input [input ...] output + +positional arguments: + input input object file + output output nasm file + +optional arguments: + -h, --help show this help message and exit + -m TARGET, --target TARGET + architecture to generate asm code for (default: auto) + -l LIB, --library LIB + libraries to link against + -L DIR, --libdir DIR directories to search libraries in + --nasm NASM which nasm binary to use + --cc CC which cc binary to use + --scanelf SCANELF which scanelf binary to use + --readelf READELF which readelf binary to use +``` + +A minimal crt (and `_start` funcion) are provided in case you want to use `main`. + +## Internal workings + +`smol.py` inspects the input object files for needed library files and symbols. +It then outputs the list of needed libraries, hashes of the needed symbols and +provides stubs for the external functions. This is then combined with a +custom-made, small ELF header and 'runtime linker' which resolves the symbols +(from the hashes) so that the function stubs are usable. + +The runtime linker uses an unorthodox way of resolving the symbols (which only +works for glibc): on both i386 and x86_64, the linker startup code +(`_dl_start_user`) leaks the global `struct link_map` to the user code: +on i386, a pointer to it is passed directly through `eax`: + +```s +# (eax, edx, ecx, esi) = (_dl_loaded, argc, argv, envp) +movl _rtld_local@GOTOFF(%ebx), %eax +## [ boring stuff... ] +pushl %eax +# Call the function to run the initializers. +call _dl_init +## eax still lives thanks to the ABI and calling convention +## [ boring stuff... ] +# Jump to the user's entry point. +jmp *%edi +## eax contains the pointer to the link_map! +``` + +On x86_64, it's a bit more convoluted: the contents of `_rtld_local` is loaded +into `rsi`, but because of the x86_64 ABI, the caller isn't required to restore +that register. However, due to the `call` instruction, a pointer to the +instruction after the call will be placed on the stack, at `_start`, it's at +`rsp - 8`. Then, the offset to the "load from `_rtld_local`"-instruction can be +calculated, and the part of the instruction which contains the offset to +`_rtld_local`, from the instruction after the load (of which the address is now +also known), can be read, and thus the contents of that global variable are +available as well. + +Now the code continues with walking the "import tables" for the needed +libraries (which already have been automatically parsed by `ld.so`), looks +though their hash tables for the hashes of the imported symbols, gets their +addresses, and replaces the hashes in the table with the function addresses. + +However, because the `struct link_map` can change between glibc versions, +especially the size of the `l_info` field (a fixed-size array, the `DT_*NUM` +constants tend to change every few versions). To remediate this, one can note +that the `l_entry` field comes a few bytes after `l_info`, that the root +`struct link_map` is the one of the main executable, and that the contents of +the `l_entry` field is known at compile-time. Thus, the loader scans the struct +for the entry point address, and uses that as an offset for the 'far fields' of +the `struct link_map`. ('Near' fields like `l_name` and `l_addr` are resp. 8 +and 0, and will thus pretty much never change.) + +## Greets + +auld alrj blackle breadbox faemiyah gib3&tix0 las leblane parcelshit unlord + diff --git a/src/header64.asm b/src/header64.asm index bc8ea2c..f7fd766 100644 --- a/src/header64.asm +++ b/src/header64.asm @@ -23,19 +23,23 @@ ehdr: dw phdr.load - phdr.dynamic ; e_phentsize %ifdef USE_NX -%ifdef USE_INTERP + %ifdef USE_INTERP dw 4 ; e_phnum -%else + %else dw 3 ; e_phnum -%endif + %endif dw 0, 0, 0 ; e_shentsize, e_shnum, e_shstrndx -%else +ehdr.end: +%endif + phdr: %ifdef USE_INTERP phdr.interp: dd PT_INTERP ; p_type ; e_phnum, e_shentsize dd 0 ; p_flags ; e_shnum, e_shstrndx + %ifndef USE_NX ehdr.end: + %endif dq interp - ehdr ; p_offset dq interp, interp ; p_vaddr, p_paddr dq interp.end - interp ; p_filesz @@ -63,10 +67,24 @@ phdr.load: dq _smol_total_memsize ; p_memsz dq 0x1000 ; p_align %else -%error "TODO" ; TODO -%endif - +phdr.load: + dd PT_LOAD + dd PHDR_R | PHDR_X + dq 0 + dq ehdr, 0 + dq _smol_textandheader_size + dq _smol_textandheader_size + dq 0x1000 ; let's hope this works +phdr.load2: + dd PT_LOAD + dd PHDR_R | PHDR_W + dq _smol_data_off + dq _smol_data_start, 0 + dq _smol_dataandbss_size + dq _smol_dataandbss_size + dq 0x1000 %endif +phdr.end: %ifdef USE_INTERP interp: diff --git a/src/loader32.asm b/src/loader32.asm index 04b7857..ef59fd7 100644 --- a/src/loader32.asm +++ b/src/loader32.asm @@ -2,11 +2,19 @@ %include "rtld.inc" +%ifdef ELF_TYPE [section .text.startup.smol] +%else +; not defined -> debugging! +[section .text] +%endif _smol_start: + +%ifdef USE_DL_FINI push edx ; _dl_fini +%endif ; try to get the 'version-agnostic' pffset of the stuff we're ; interested in mov ebx, eax @@ -118,7 +126,9 @@ link.done: ;xor ebp, ebp ; let's put that burden on the user code, so they can leave ; it out if they want to +%ifdef USE_DL_FINI pop edx ; _dl_fini +%endif sub esp, 20 ; put the stack where _start (C code) expects it to be ; this can't be left out, because X needs the envvars diff --git a/src/loader64.asm b/src/loader64.asm index 613fc23..ad955ee 100644 --- a/src/loader64.asm +++ b/src/loader64.asm @@ -9,19 +9,10 @@ [section .text] %endif -; rax: special op reg -;!rbx: ptrdiff_t glibc_vercompat_extra_hi_field_off -; rcx: special op reg -; rdx: special op reg -; rsi: special op reg -; rdi: struct link_map* root / special op reg -; rbp: -; r8 : -; r9 : -;!r10: struct link_map* entry + far correction factor -; r11: temp storage var -;!r12: struct link_map* entry -;!r13: _dl_fini address (reqd by the ABI) +; rbx: ptrdiff_t glibc_vercompat_extra_hi_field_off +; r10: struct link_map* entry + far correction factor +; r12: struct link_map* entry +; r13: _dl_fini address (reqd by the ABI) %ifndef ELF_TYPE extern _symbols @@ -29,7 +20,9 @@ global _start _start: %endif _smol_start: +%ifdef USE_DL_FINI xchg r13, rdx ; _dl_fini +%endif mov r12, [rsp - 8] ; return address of _dl_init mov r11d, dword [r12 - 20] ; decode part of 'mov rdi, [rel _rtld_global]' @@ -164,9 +157,11 @@ _smol_start: jmp short .next_hash .needed_end: - ;xor rbp, rbp + ;xor rbp, rbp ; still 0 from _dl_start_user mov rdi, rsp push rax +%ifdef USE_DL_FINI xchg rsi, r13 ; _dl_fini +%endif ; fallthru to _start