32-bit version using dnload loader now working

This commit is contained in:
PoroCYon 2019-03-28 16:37:05 +01:00
parent d9dbaae27a
commit 0186019f99
9 changed files with 255 additions and 82 deletions

View File

@ -13,7 +13,7 @@ BITS ?= $(shell getconf LONG_BIT)
COPTFLAGS=-Os -fno-plt -fno-stack-protector -fno-stack-check -fno-unwind-tables \
-fno-asynchronous-unwind-tables -fomit-frame-pointer -ffast-math -no-pie \
-fno-pic -fno-PIE -m64 -march=core2 -ffunction-sections -fdata-sections
-fno-pic -fno-PIE -m64 -march=core2 -ffunction-sections -fdata-sections -fno-plt
CXXOPTFLAGS=$(COPTFLAGS) -fno-exceptions \
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
@ -41,7 +41,7 @@ CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
LIBS=-lc
SMOLFLAGS +=
ASFLAGS += -DUSE_INTERP -DALIGN_STACK
ASFLAGS += -DUSE_INTERP -DUSE_DNLOAD_LOADER -DNO_START_ARG -DUNSAFE_DYNAMIC #-DALIGN_STACK
#-DUSE_DNLOAD_LOADER #-DUSE_DT_DEBUG #-DUSE_DL_FINI #-DNO_START_ARG #-DUNSAFE_DYNAMIC
NASM ?= nasm

View File

@ -19,7 +19,8 @@ ld -T ld/link.ld --oformat=binary -o output.elf nasm-output.o input.o...
* `ALIGN_STACK`: *64-bit only*: realign the stack so that SSE instructions
won't segfault. Costs 1 byte.
* `USE_NX`: Don't use `RWE` segments at all. Not very well tested. Costs the
size of 1 phdr.
size of 1 phdr, plus some extra stuff on `i386`. Don't forget to pass `-n`
to `smol.py` as well.
* `USE_DL_FINI`: keep track of the `_dl_fini` function and pass it to your
`_start`. Costs 2 bytes, plus maybe a few more depending on how it's passed
to `__libc_start_main`.
@ -29,9 +30,10 @@ ld -T ld/link.ld --oformat=binary -o output.elf nasm-output.o input.o...
strictly worse size-wise by 10 (i386) or 3 (x86_64) bytes.
* `SKIP_ENTRIES`: skip the first two entries of the `struct link_map`, which
represent the main binary and the vDSO. Costs around 5 bytes.
* `USE_DNLOAD_LOADER`: *64-bit only*: use the symbol loading mechanism as used
in dnload (i.e. traverse the symtab of the imported libraries). Slightly
larger, but probably better compressable.
* `USE_DNLOAD_LOADER`: use the symbol loading mechanism as used in dnload (i.e.
traverse the symtab of the imported libraries). Slightly larger, but probably
better compressable and more compatible with other libcs and future versions
of glibc.
* `NO_START_ARG`: *don't* pass the stack pointer to `_start` as the first arg.
Will make it unable to read argc/argv/environ, but gives you 3 bytes.
@ -55,6 +57,10 @@ optional arguments:
--cc CC which cc binary to use
--scanelf SCANELF which scanelf binary to use
--readelf READELF which readelf binary to use
-n, --nx Use NX (i.e. don't use RWE pages). Costs the size of
one phdr, plus some extra bytes on i386. Don't forget
to pass -DUSE_NX to the assembly loader as well!
```
A minimal crt (and `_start` funcion) are provided in case you want to use `main`.

View File

@ -34,6 +34,9 @@ def main():
# help="Use dnload's mechanism of importing functions. Slightly larger, but usually better compressable.")
# parser.add_argument('--libsep', default=False, action='store_true', \
# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
parser.add_argument('-n', '--nx', default=False, action='store_true', \
help="Use NX (i.e. don't use RWE pages). Costs the size of one phdr, "\
+"plus some extra bytes on i386.")
parser.add_argument('input', nargs='+', help="input object file")
parser.add_argument('output', type=argparse.FileType('w'), \
@ -66,7 +69,7 @@ def main():
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
output(arch, symbols, args.output)
output(arch, symbols, args.nx, args.output)
if __name__ == '__main__':
main()

View File

@ -3,8 +3,26 @@ import sys
from smolshared import *
def output_x86(libraries, outf):
def output_x86(libraries, nx, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly
if nx:
outf.write('%define USE_NX 1\n')
usedrelocs = set({})
for library, symrels in libraries.items():
for sym, reloc in symrels: usedrelocs.add(reloc)
if not(nx) and 'R_386_PC32' in usedrelocs and 'R_386_GOT32X' in usedrelocs:
eprintf("Using a mix of R_386_PC32 and R_386_GOT32X relocations! "+\
"Please change a few C compiler flags and recompile your code.")
exit(1)
use_jmp_bytes = not nx and 'R_386_PC32' in usedrelocs
if use_jmp_bytes:
outf.write('%define USE_JMP_BYTES 1\n')
outf.write('bits 32\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
@ -14,39 +32,57 @@ def output_x86(libraries, outf):
for library in libraries:
outf.write('dd 1;DT_NEEDED\n')
outf.write('dd (_symbols.{} - _strtab)\n'.format(shorts[library]))
outf.write('dynamic.end:\n')
outf.write("""\
dynamic.end:
%ifndef UNSAFE_DYNAMIC
dd DT_NULL
%endif
""")
# if needgot:
# outf.write('global _GLOBAL_OFFSET_TABLE_\n')
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dd dynamic\n')
outf.write('[section .rodata.neededlibs]\n')
outf.write('_strtab:\n')
# if not libsep:
# for library, symrels in libraries.items():
# outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for library, symrels in libraries.items():
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
outf.write('[section .data.smolgot]\n')
if not nx:
outf.write('[section .text.smolplt]\n')
outf.write('_symbols:\n')
for library, symrels in libraries.items():
# if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels:
# meh
if reloc != 'R_386_PC32':
if reloc != 'R_386_PC32' and reloc != 'R_386_GOT32X':
eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
sys.exit(1)
hash = hash_djb2(sym)
outf.write("""
if nx:
outf.write("\t\t_symbols.{lib}.{name}: dd 0x{hash:x}"\
.format(lib=shorts[library],name=sym,hash=hash).lstrip('\n'))
else:
outf.write(("""\
\t\tglobal {name}
\t\t{name}: db 0xE9
\t\t dd 0x{hash:x}
""".format(name=sym, hash=hash).lstrip('\n'))
\t\t{name}:""" + ("\n\t\t\tdb 0xE9" if use_jmp_bytes else '') + """
\t\t\tdd 0x{hash:x}
""").format(name=sym, hash=hash).lstrip('\n'))
outf.write('\tdb 0\n') # TODO: not a dd?
outf.write('db 0\n')
outf.write('_symbols.end:\n')
if nx:
outf.write('_smolplt:\n')
for library, symrels in libraries.items():
for sym, reloc in symrels:
outf.write("""\
[section .text.smolplt.{name}]
global {name}
{name}:
\tjmp [dword _symbols.{lib}.{name}]
""".format(lib=shorts[library],name=sym).lstrip('\n'))
outf.write('_smolplt.end:\n')
outf.write('%include "loader32.asm"\n')
# end output_x86
@ -104,11 +140,11 @@ global {name}
for library, symrels in libraries.items():
for sym, reloc in symrels:
if reloc == 'R_X86_64_PLT32':
outf.write("""
outf.write("""\
[section .text.smolplt.{name}]
global {name}
{name}:
jmp [rel _symbols.{lib}.{name}]
\tjmp [rel _symbols.{lib}.{name}]
""".format(lib=shorts[library],name=sym).lstrip('\n'))
outf.write('_smolplt.end:\n')
@ -116,8 +152,8 @@ global {name}
# end output_amd64
def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf)
def output(arch, libraries, nx, outf):
if arch == 'i386': output_x86(libraries, nx, outf)
elif arch == 'x86_64': output_amd64(libraries, outf)
else:
eprintf("E: cannot emit for arch '" + str(arch) + "'")

View File

@ -29,6 +29,7 @@
%define PHDR_X (1)
%define DT_NULL ( 0)
%define DT_NEEDED ( 1)
%define DT_STRTAB ( 5)
%define DT_SYMTAB ( 6)
%define DT_DEBUG (21)
@ -36,6 +37,15 @@
%define ST_NAME_OFF ( 0)
; ,---- not 16?
; v
%define ST_VALUE_OFF ( 8)
%define SYMTAB_SIZE (24)
%if __BITS__ == 32
%define D_UN_PTR_OFF ( 4)
%define ST_VALUE_OFF ( 4)
%define SYMTAB_SIZE (16)
%define ELF_DYN_SZ ( 8)
%else
%define D_UN_PTR_OFF ( 8)
%define ST_VALUE_OFF ( 8)
%define SYMTAB_SIZE (24)
%define ELF_DYN_SZ (16)
%endif

View File

@ -6,7 +6,7 @@
%include "elf.inc"
header:
ehdr:
; e_ident
db 0x7F, "ELF"
db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI
@ -17,11 +17,11 @@ header:
dw ELF_MACHINE ; e_machine: 3 = x86
dd EI_VERSION ; e_version
dd _smol_start ; e_entry
dd (.segments - header) ; e_phoff
dd (phdr - ehdr) ; e_phoff
dd 0 ; e_shoff
dd 0 ; e_flags
dw (.segments - header) ; e_ehsize
dw (.segments.load - .segments.dynamic) ; e_phentsize
dw (phdr - ehdr) ; e_ehsize
dw (phdr.load - phdr.dynamic) ; e_phentsize
%ifdef USE_NX
%error "USE_NX not supported yet on i386 ('GOT' still needs RWX, and alignment has to be fixed)"
;%ifdef USE_INTERP
@ -47,41 +47,45 @@ header:
; dd (PHDR_R | PHDR_W)
; dd 0x1;000
%else
.segments:
phdr:
%endif
%ifdef USE_INTERP
.segments.interp:
phdr.interp:
dd PT_INTERP ; {e_phnum: 2, e_shentsize: 0}, p_type
dd (.interp - header) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd .interp, .interp ; p_vaddr, p_paddr
dd (.interp.end-.interp) ; p_filesz
dd (.interp.end-.interp) ; p_memsz
dd (interp - ehdr) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd interp, interp ; p_vaddr, p_paddr
dd (interp.end-interp); p_filesz
dd (interp.end-interp); p_memsz
dd 0,0 ; p_flags, p_align
%endif
.segments.dynamic:
phdr.dynamic:
dd PT_DYNAMIC ; {e_phnum: 2, e_shentsize: 0}, p_type
dd (dynamic - header) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd (dynamic - ehdr) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
dd dynamic, 0 ; p_vaddr, p_paddr
dd (dynamic.end - dynamic) ; p_filesz
dd (dynamic.end - dynamic) ; p_memsz
dd 0, 0 ; p_flags, p_align
%ifndef USE_NX
.segments.load:
phdr.load:
dd PT_LOAD ; p_type: 1 = PT_LOAD
dd 0 ; p_offset
dd _smol_origin, 0 ; p_vaddr, p_paddr
dd ehdr, 0 ; p_vaddr, p_paddr
; use memsize twice here, linux doesn't care and it compresses better
dd _smol_total_memsize ; p_filesz
dd _smol_total_memsize ; p_memsz
dd (PHDR_R | PHDR_W | PHDR_X) ; p_flags
dd 0x1000 ; p_align
%endif
.segments.end:
phdr.end:
%ifdef USE_INTERP
.interp:
[section .rodata.interp]
interp:
db "/lib/ld-linux.so.2",0
.interp.end:
interp.end:
%endif
[section .rodata.dynamic]
global _DYNAMIC
_DYNAMIC:
dynamic:
@ -93,6 +97,7 @@ dynamic.symtab:
dd DT_SYMTAB ; d_tag: 6 = DT_SYMTAB
dd 0 ; d_un.d_ptr
%ifdef USE_DT_DEBUG
dynamic.debug:
dd DT_DEBUG
_DEBUG:
dd 0

View File

@ -1,4 +1,4 @@
; vim: set ft=nasm ts=8:
; vim: set ft=nasm et:
%include "rtld.inc"
@ -13,29 +13,124 @@
_smol_start:
%ifdef USE_DL_FINI
push edx ; _dl_fini
push edx ; _dl_fini
%endif
; try to get the 'version-agnostic' pffset of the stuff we're
; interested in
; try to get the 'version-agnostic' pffset of the stuff we're
; interested in
%ifdef USE_DT_DEBUG
mov eax, [rel _DEBUG]
mov eax, [eax + 4]
mov eax, [rel _DEBUG]
mov eax, [eax + 4]
%endif
%ifdef SKIP_ENTRIES
mov eax, [eax + LM_NEXT_OFFSET] ; skip this binary
mov eax, [eax + LM_NEXT_OFFSET] ; skip the vdso
mov eax, [eax + LM_NEXT_OFFSET] ; skip this binary
; mov eax, [eax + LM_NEXT_OFFSET] ; skip the vdso
%endif
push _symbols
push eax
%ifdef USE_DNLOAD_LOADER
pop ebp
pop edi
.next_hash:
mov ecx, [edi]
; assume it's nonzero
push ebp
pop edx
; edx: hash
; ebx: link_map* chain
.next_link:
; pop edx
mov edx, [edx + L_NEXT_OFF]
; ElfW(Dyn)* dyn(esi) = ebx->l_ld
mov esi, [edx + L_LD_OFF]
push edx
; get strtab off
.next_dyn:
lodsd
cmp al, DT_STRTAB
lodsd
jne short .next_dyn
; void* addr(edx) = ebx->l_addr
; const char* strtab(ebx)=lookup(esi,DT_STRTAB);
mov edx, [edx + L_ADDR_OFF]
cmp eax, edx
jae short .noreldynaddr
add eax, edx
.noreldynaddr:
push eax
pop ebx
; const ElfW(Sym)* symtab(edx) = lookup(esi, DT_SYMTAB);
lodsd ; SYMTAB d_tag
lodsd ; SYMTAB d_un
cmp eax, edx
jae short .norelsymaddr
add eax, edx
.norelsymaddr:
push eax
pop edx
.next_sym:
mov esi, [edx + ST_NAME_OFF]
add esi, ebx
push ecx
push ebx
push 33
push 5381
pop eax
pop ebx
xor ecx, ecx
.nexthashiter:
xchg eax, ecx
lodsb
or al, al
xchg eax, ecx
jz short .breakhash
push edx
mul ebx
pop edx
add eax, ecx
jmp short .nexthashiter
.breakhash:
pop ebx
pop ecx
cmp ecx, eax
je short .hasheq
add edx, SYMTAB_SIZE
cmp edx, ebx
jb short .next_sym
pop edx
jmp short .next_link
.hasheq:
mov eax, [edx + ST_VALUE_OFF]
pop edx
mov esi, [edx + L_ADDR_OFF]
;cmp eax, esi
; jb short .hasheqnorel
add eax, esi
.hasheqnorel:
;add eax, [edx + L_ADDR_OFF] ; TODO: CONDITIONAL!
stosd
%ifdef USE_JMP_BYTES
inc edi ; skip 0xE9 (jmp) offset
%endif
cmp word [edi], 0
jne short .next_hash
; if USE_DNLOAD_LOADER
%else
mov ebx, eax
; mov esi, eax
;.looper:
; lodsd
; cmp dword eax, _smol_start
; jne short .looper
; sub esi, ebx
; sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsd into acct
mov edi, eax
push -1
pop ecx
@ -140,6 +235,8 @@ link: ; (struct link_map *root, char *symtable)
jmp short link.do_symbols
inc esi
link.done:
; if USE_DNLOAD_LOADER ... else ...
%endif
;xor ebp, ebp ; let's put that burden on the user code, so they can leave
; it out if they want to
@ -147,10 +244,15 @@ link.done:
%ifdef USE_DL_FINI
pop edx ; _dl_fini
%endif
sub esp, 20 ; put the stack where _start (C code) expects it to be
; this can't be left out, because X needs the envvars
; move esp into eax, *then* increase the stack by 4, as main()
; expects a return address to be inserted by a call instruction
; (which we don't have, so we're doing a 1-byte fixup instead of a
; 5-byte call)
push esp
pop eax
push eax
push eax
;.loopme: jmp short .loopme
;jmp short _start
; by abusing the linker script, _start ends up right here :)

View File

@ -69,22 +69,22 @@ _smol_start:
jne short .next_dyn
; void* addr(rcx) = r12->l_addr
; const char* strtab(r9) = lookup(rsi,DT_STRTAB), *symtab_end(r8)=r9;
; const char* strtab(r8)=lookup(rsi,DT_STRTAB)/*,*symtab_end(r9)=r8*/;
mov rcx, [r12 + L_ADDR_OFF]
cmp rax, rcx
jge short .noreldynaddr
jae short .noreldynaddr
add rax, rcx
.noreldynaddr:
push rax
push rax
; push rax
pop r8
pop r9
; pop r9
; const ElfW(Sym)* symtab(rdx) = lookup(rsi, DT_SYMTAB);
lodsq ; SYMTAB d_tag
lodsq ; SYMTAB d_un.d_ptr
cmp rax, rcx
jge short .norelsymaddr
jae short .norelsymaddr
add rax, rcx
.norelsymaddr:
; xchg rax, rdx
@ -93,7 +93,7 @@ _smol_start:
.next_sym:
mov esi, dword [rdx + ST_NAME_OFF]
add rsi, r9
add rsi, r8;9
xor ecx, ecx
push 33
@ -122,7 +122,7 @@ _smol_start:
add rdx, SYMTAB_SIZE
cmp rdx, r8
jl short .next_sym
jb short .next_sym
jmp short .next_link
.hasheq:

View File

@ -1,6 +1,23 @@
; vim: set ft=nasm:
%if __BITS__ == 32
%define L_ADDR_OFF ( 0)
%define L_NAME_OFF ( 4)
%define L_LD_OFF ( 8)
%define L_NEXT_OFF (0x0C)
%define L_INFO_OFF (0x20)
;%define L_INFO_DT_SYMTAB_OFF ()
%define LF_ENTRY_OFF ( 340)
%define LF_NBUCKETS_OFF (0x178)
%define LF_GNU_BUCKETS_OFF (0x188)
%define LF_GNU_CHAIN_ZERO_OFF (0x18C)
%define L_GNU_BUCKETS_SZ (4)
%define L_GNU_CHAIN_ZERO_SZ (4)
; old defs:
%define LM_NAME_OFFSET 0x4
%define LM_NEXT_OFFSET 0xC
%define LM_ADDR_OFFSET 0
@ -18,10 +35,7 @@
%define DT_SYMSIZE_SHIFT 4
%define DT_STRTAB 0x5
%define DT_SYMTAB 0x6
%else
%else ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define L_ADDR_OFF ( 0)
%define L_NAME_OFF ( 8)
%define L_LD_OFF ( 16)
@ -37,10 +51,7 @@
%define L_GNU_BUCKETS_SZ ( 4)
%define L_GNU_CHAIN_ZERO_SZ ( 4)
%define D_UN_PTR_OFF ( 8)
%define ST_VALUE_OFF ( 8)
%define ELF_SYM_SZ ( 24)
%define ELF_DYN_SZ ( 16)
%endif
%define ELF_SYM_SZ (SYMTAB_SIZE)