mirror of https://github.com/Shizmob/smol
372 lines
9.2 KiB
NASM
372 lines
9.2 KiB
NASM
; vim: set ft=nasm et:
|
|
|
|
%ifndef HASH_END_TYP
|
|
%warning "W: HASH_END_TYP not defined, falling back to 16-bit!"
|
|
%define HASH_END_TYP word
|
|
%endif
|
|
|
|
;%define R10_BIAS (0x2B4)
|
|
%define R10_BIAS (0x2B4+0x40)
|
|
|
|
%include "rtld.inc"
|
|
|
|
%ifdef ELF_TYPE
|
|
[section .text.startup.smol]
|
|
%else
|
|
; not defined -> debugging!
|
|
[section .text]
|
|
%endif
|
|
|
|
; r9 : ptrdiff_t glibc_vercompat_extra_hi_field_off
|
|
; r10: struct link_map* entry + far correction factor
|
|
; r12: struct link_map* entry
|
|
; r14: struct link_map* root
|
|
; r13: _dl_fini address (reqd by the ABI)
|
|
|
|
%ifndef ELF_TYPE
|
|
extern _symbols
|
|
global _start
|
|
_start:
|
|
%endif
|
|
global _smol_start:
|
|
_smol_start:
|
|
%ifdef USE_DL_FINI
|
|
xchg r13, rdx ; _dl_fini
|
|
%endif
|
|
|
|
%ifdef USE_DT_DEBUG
|
|
mov r12, [rel _DEBUG]
|
|
mov r12, [r12 + 8]
|
|
%else
|
|
mov r12, [rsp - 8] ; return address of _dl_init
|
|
mov ebx, dword [r12 - 20] ; decode part of 'mov rdi, [rel _rtld_global]'
|
|
mov r12, [r12 + rbx - 16] ; ???
|
|
%endif
|
|
; struct link_map* root = r12
|
|
%ifdef SKIP_ENTRIES
|
|
mov r12, [r12 + L_NEXT_OFF] ; skip this binary
|
|
; mov r12, [r12 + L_NEXT_OFF] ; skip the vdso
|
|
; the second one isn't needed anymore, see code below (.next_link)
|
|
%endif
|
|
|
|
%ifdef USE_DNLOAD_LOADER
|
|
push _symbols
|
|
push r12
|
|
pop r11
|
|
pop rdi
|
|
|
|
;.loopme: jmp short .loopme ; debugging
|
|
.next_hash:
|
|
mov r14d, dword [rdi]
|
|
; assume it's nonzero
|
|
push r11
|
|
pop r12
|
|
|
|
.next_link:
|
|
mov r12, [r12 + L_NEXT_OFF]
|
|
; ElfW(Dyn)* dyn(rsi) = r12->l_ld
|
|
mov rsi, [r12 + L_LD_OFF]
|
|
|
|
; get strtab off
|
|
.next_dyn:
|
|
lodsq
|
|
cmp al, DT_STRTAB
|
|
lodsq
|
|
jne short .next_dyn
|
|
|
|
; void* addr(rcx) = r12->l_addr
|
|
; const char* strtab(r8)=lookup(rsi,DT_STRTAB)/*,*symtab_end(r9)=r8*/;
|
|
mov rcx, [r12 + L_ADDR_OFF]
|
|
cmp rax, rcx
|
|
jae short .noreldynaddr
|
|
add rax, rcx
|
|
.noreldynaddr:
|
|
push rax
|
|
; push rax
|
|
pop r8
|
|
; pop r9
|
|
|
|
; const ElfW(Sym)* symtab(rdx) = lookup(rsi, DT_SYMTAB);
|
|
lodsq ; SYMTAB d_tag
|
|
lodsq ; SYMTAB d_un.d_ptr
|
|
cmp rax, rcx
|
|
jae short .norelsymaddr
|
|
add rax, rcx
|
|
.norelsymaddr:
|
|
; xchg rax, rdx
|
|
push rax
|
|
pop rdx
|
|
|
|
.next_sym:
|
|
mov esi, dword [rdx + ST_NAME_OFF]
|
|
add rsi, r8;9
|
|
|
|
%ifndef USE_CRC32C_HASH
|
|
; djb2
|
|
xor ecx, ecx
|
|
push 33
|
|
push 5381
|
|
; push 0
|
|
; pop rcx
|
|
pop rax
|
|
pop rbx
|
|
%else
|
|
; crc32
|
|
push -1
|
|
pop rcx
|
|
%endif
|
|
.nexthashiter:
|
|
%ifndef USE_CRC32C_HASH
|
|
; djb2
|
|
; TODO: optimize register usage a bit more
|
|
xchg eax, ecx
|
|
%endif
|
|
lodsb
|
|
or al, al
|
|
%ifndef USE_CRC32C_HASH
|
|
; djb2
|
|
xchg eax, ecx
|
|
%endif
|
|
jz short .breakhash
|
|
|
|
%ifndef USE_CRC32C_HASH
|
|
; djb2
|
|
push rdx
|
|
mul ebx
|
|
pop rdx
|
|
add eax, ecx
|
|
%else
|
|
; crc32c
|
|
crc32 ecx, al
|
|
%endif
|
|
jmp short .nexthashiter
|
|
.breakhash:
|
|
%ifdef USE_CRC32C_HASH
|
|
; crc32c
|
|
cmp r14d, ecx
|
|
%else
|
|
; djb2
|
|
cmp r14d, eax
|
|
|
|
%endif
|
|
je short .hasheq
|
|
|
|
add rdx, SYMTAB_SIZE
|
|
cmp rdx, r8
|
|
jb short .next_sym
|
|
jmp short .next_link
|
|
|
|
.hasheq:
|
|
%ifdef IFUNC_SUPPORT
|
|
mov cl , [rdx + ST_INFO_OFF]
|
|
%endif
|
|
mov rax, [rdx + ST_VALUE_OFF]
|
|
%ifdef SKIP_ZERO_VALUE
|
|
or rax, rax
|
|
jz short .next_link
|
|
%endif
|
|
add rax, [r12 + L_ADDR_OFF]
|
|
%ifdef IFUNC_SUPPORT
|
|
and cl, ST_INFO__STT_MASK
|
|
cmp cl, STT_GNU_IFUNC
|
|
%ifdef SKIP_ZERO_VALUE
|
|
jne short .no_ifunc2
|
|
push rdi
|
|
push r11
|
|
call rax
|
|
pop r11
|
|
pop rdi
|
|
.no_ifunc2:
|
|
%else ; !SKIP_ZERO_VALUE
|
|
je short .ifunc
|
|
.no_ifunc:
|
|
%endif
|
|
%endif
|
|
stosq
|
|
cmp HASH_END_TYP [rdi], 0
|
|
%ifdef IFUNC_SUPPORT
|
|
%ifdef SKIP_ZERO_VALUE
|
|
jne .next_hash;short .next_hash
|
|
%else ; IFUNC_SUPPORT && !SKIP_ZERO_VALUE
|
|
jne short .next_hash
|
|
%endif
|
|
%else ; !IFUNC_SUPPORT
|
|
jne short .next_hash
|
|
%endif
|
|
|
|
%ifdef IFUNC_SUPPORT
|
|
%ifndef SKIP_ZERO_VALUE
|
|
jmp short .break_loop
|
|
.ifunc:
|
|
;;int3 ; in this call, we lose rax rcx rdx rsi rdi r8 r9 r10 r11
|
|
; we only need persistence for rdi and r11 tho
|
|
;push rcx
|
|
;push rdx
|
|
;push rsi
|
|
push rdi
|
|
;push r8
|
|
;push r9
|
|
;push r10
|
|
push r11
|
|
call rax
|
|
pop r11
|
|
;pop r10
|
|
;pop r9
|
|
;pop r8
|
|
pop rdi
|
|
;pop rsi
|
|
;pop rdx
|
|
;pop rcx
|
|
jmp short .no_ifunc
|
|
.break_loop:
|
|
%endif
|
|
%endif
|
|
|
|
; if USE_DNLOAD_LOADER
|
|
%else
|
|
push _smol_start
|
|
push r12
|
|
push -1
|
|
pop rcx
|
|
pop rdi
|
|
pop rax
|
|
repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller
|
|
sub rdi, r12
|
|
sub rdi, LF_ENTRY_OFF+4
|
|
xchg r9, rdi
|
|
|
|
push _symbols
|
|
; back up link_map root
|
|
push r12
|
|
pop r11
|
|
pop rdi
|
|
|
|
;.loopme: jmp short .loopme ; debugging
|
|
.next_hash:
|
|
mov r14d, dword [rdi]
|
|
; assume we need at least one function
|
|
; or al, al
|
|
; jz short .needed_end
|
|
mov r12, r11
|
|
; push r11
|
|
push r14
|
|
pop rbx
|
|
; pop r12
|
|
; shift right because we don't want to compare the lowest bit
|
|
shr ebx, 1
|
|
|
|
.next_link:
|
|
mov r12, [r12 + L_NEXT_OFF]
|
|
|
|
lea r10, [r12 + r9 + R10_BIAS]
|
|
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
|
|
xor edx, edx
|
|
push r14
|
|
pop rax
|
|
mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS]
|
|
div ecx
|
|
|
|
; uint32_t bucket(ecx) = entry->l_gnu_buckets[bkt_ind]
|
|
mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
|
|
mov ecx, dword [r8 + rdx * 4]
|
|
|
|
; can be ignored apparently?
|
|
; jecxz .next_link
|
|
|
|
.next_chain:
|
|
; uint32_t luhash(edx) = entry->l_gnu_chain_zero[bucket] >> 1
|
|
mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
|
|
mov edx, dword [rdx + rcx * 4]
|
|
|
|
; TODO: make this not suck. (maybe using bt*?)
|
|
mov al, dl
|
|
|
|
shr edx, 1
|
|
; if (luhash == hash) break;
|
|
cmp edx, ebx
|
|
je short .chain_break
|
|
|
|
; ++bucket; } while (luhash & 1);
|
|
and al, 1
|
|
jnz short .next_link
|
|
|
|
inc ecx
|
|
jmp short .next_chain
|
|
|
|
.chain_break:
|
|
; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr
|
|
; ElfW(Sym)* sym = &symtab[bucket]
|
|
; *phash = sym->st_value + entry->l_addr
|
|
|
|
; ElfW(Dyn)* dyn(rax) = entry->l_info[DT_SYMTAB]
|
|
mov rax, [r12 + L_INFO_DT_SYMTAB_OFF]
|
|
; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr
|
|
mov rax, [rax + D_UN_PTR_OFF]
|
|
; ElfW(Addr) symoff(rax) = symtab[bucket].st_value
|
|
lea rdx, [rcx + rcx * 2]
|
|
|
|
%ifdef IFUNC_SUPPORT
|
|
; large opcode, but, ~almost the same as the next one, so,
|
|
; should compress well
|
|
mov rcx, [rax + rdx * 8 + ST_VALUE_OFF]
|
|
mov rax, [rax + rdx * 8 + ST_INFO_OFF ] ; actually just 'al' needed here
|
|
|
|
%ifdef SKIP_ZERO_VALUE
|
|
jrcxz .next_link
|
|
%endif
|
|
; void* finaladdr(rcx) = symoff + entry->l_addr
|
|
add rcx, [r12 + L_ADDR_OFF]
|
|
|
|
; is this an ifunc?
|
|
and al, ST_INFO__STT_MASK
|
|
cmp al, STT_GNU_IFUNC
|
|
xchg rcx, rax
|
|
jne .no_ifunc
|
|
; if so: call the resolver
|
|
push rdi
|
|
push r11
|
|
call rax
|
|
pop r11
|
|
pop rdi
|
|
.no_ifunc:
|
|
; IFUNC_SUPPORT
|
|
%else
|
|
mov rax, [rax + rdx * 8 + ST_VALUE_OFF]
|
|
%ifdef SKIP_ZERO_VALUE
|
|
or rax, rax ; zero value => weak symbol or sth
|
|
jz short .next_link
|
|
%endif
|
|
; void* finaladdr(rax) = symoff + entry->l_addr
|
|
add rax, [r12 + L_ADDR_OFF]
|
|
; IFUNC_SUPPORT
|
|
%endif
|
|
stosq ; *phash = finaladdr
|
|
cmp HASH_END_TYP [rdi], 0
|
|
jne short .next_hash
|
|
; } while (1)
|
|
; jmp short .next_hash
|
|
|
|
; if USE_DNLOAD_LOADER ... else ...
|
|
%endif
|
|
|
|
.needed_end:
|
|
; int3 ; debugging
|
|
; xor rbp, rbp ; still 0 from _dl_start_user
|
|
%ifndef NO_START_ARG
|
|
; arg for _start
|
|
mov rdi, rsp
|
|
%endif
|
|
%ifdef ALIGN_STACK
|
|
push rax
|
|
%endif
|
|
%ifdef USE_DL_FINI
|
|
xchg rsi, r13 ; _dl_fini
|
|
%endif
|
|
; fallthru to _start
|
|
%ifdef ELF_TYPE
|
|
global _smol_rt_end:
|
|
_smol_rt_end:
|
|
%endif
|
|
|
|
;.loopme: jmp short .loopme
|