smol/rt/loader64.asm

375 lines
9.2 KiB
NASM
Raw Permalink Normal View History

; vim: set ft=nasm et:
2019-01-30 23:14:36 +00:00
%ifndef HASH_END_TYP
%warning "W: HASH_END_TYP not defined, falling back to 16-bit!"
%define HASH_END_TYP word
%endif
;%define R10_BIAS (0x2B4)
%define R10_BIAS (0x2B4+0x40)
2019-01-30 23:14:36 +00:00
%include "rtld.inc"
%ifdef ELF_TYPE
[section .text.startup.smol]
%else
; not defined -> debugging!
[section .text]
%endif
; r9 : ptrdiff_t glibc_vercompat_extra_hi_field_off
2019-02-02 17:14:28 +00:00
; r10: struct link_map* entry + far correction factor
; r12: struct link_map* entry
; r14: struct link_map* root
2019-02-02 17:14:28 +00:00
; r13: _dl_fini address (reqd by the ABI)
2019-01-30 23:14:36 +00:00
%ifndef ELF_TYPE
extern _symbols
global _start
_start:
%endif
global _smol_start:
2019-01-30 23:14:36 +00:00
_smol_start:
2019-02-02 17:14:28 +00:00
%ifdef USE_DL_FINI
2019-01-30 23:14:36 +00:00
xchg r13, rdx ; _dl_fini
2019-02-02 17:14:28 +00:00
%endif
2019-01-30 23:14:36 +00:00
2019-02-06 20:48:27 +00:00
%ifdef USE_DT_DEBUG
mov r12, [rel _DEBUG]
mov r12, [r12 + 8]
2019-02-06 20:48:27 +00:00
%else
2019-01-30 23:14:36 +00:00
mov r12, [rsp - 8] ; return address of _dl_init
mov ebx, dword [r12 - 20] ; decode part of 'mov rdi, [rel _rtld_global]'
mov r12, [r12 + rbx - 16] ; ???
2019-02-06 20:48:27 +00:00
%endif
; struct link_map* root = r12
2019-02-06 20:48:27 +00:00
%ifdef SKIP_ENTRIES
mov r12, [r12 + L_NEXT_OFF] ; skip this binary
; mov r12, [r12 + L_NEXT_OFF] ; skip the vdso
; the second one isn't needed anymore, see code below (.next_link)
2019-02-06 20:48:27 +00:00
%endif
%ifdef USE_DNLOAD_LOADER
push _symbols
push r12
pop r11
pop rdi
2020-08-24 20:44:56 +00:00
%ifdef HANG_ON_STARTUP
.loopme: jmp short .loopme ; debugging
%endif
.next_hash:
mov r14d, dword [rdi]
; assume it's nonzero
push r11
pop r12
.next_link:
mov r12, [r12 + L_NEXT_OFF]
; ElfW(Dyn)* dyn(rsi) = r12->l_ld
mov rsi, [r12 + L_LD_OFF]
; get strtab off
.next_dyn:
lodsq
cmp al, DT_STRTAB
lodsq
jne short .next_dyn
; void* addr(rcx) = r12->l_addr
; const char* strtab(r8)=lookup(rsi,DT_STRTAB)/*,*symtab_end(r9)=r8*/;
mov rcx, [r12 + L_ADDR_OFF]
cmp rax, rcx
jae short .noreldynaddr
add rax, rcx
.noreldynaddr:
push rax
; push rax
pop r8
; pop r9
; const ElfW(Sym)* symtab(rdx) = lookup(rsi, DT_SYMTAB);
lodsq ; SYMTAB d_tag
lodsq ; SYMTAB d_un.d_ptr
cmp rax, rcx
jae short .norelsymaddr
add rax, rcx
.norelsymaddr:
2019-03-11 14:11:35 +00:00
; xchg rax, rdx
push rax
pop rdx
.next_sym:
mov esi, dword [rdx + ST_NAME_OFF]
add rsi, r8;9
%ifndef USE_CRC32C_HASH
; djb2
2019-03-11 14:11:35 +00:00
xor ecx, ecx
push 33
push 5381
2019-03-11 14:11:35 +00:00
; push 0
; pop rcx
pop rax
pop rbx
%else
; crc32
xor ecx, ecx
%endif
.nexthashiter:
%ifndef USE_CRC32C_HASH
; djb2
2019-03-11 14:11:35 +00:00
; TODO: optimize register usage a bit more
xchg eax, ecx
%endif
2019-03-11 14:11:35 +00:00
lodsb
or al, al
%ifndef USE_CRC32C_HASH
; djb2
2019-03-11 14:11:35 +00:00
xchg eax, ecx
%endif
2019-03-11 14:11:35 +00:00
jz short .breakhash
%ifndef USE_CRC32C_HASH
; djb2
2019-03-11 14:11:35 +00:00
push rdx
mul ebx
pop rdx
add eax, ecx
%else
; crc32c
crc32 ecx, al
%endif
2019-03-11 14:11:35 +00:00
jmp short .nexthashiter
.breakhash:
%ifdef USE_CRC32C_HASH
; crc32c
cmp r14d, ecx
%else
; djb2
cmp r14d, eax
%endif
2019-03-11 14:11:35 +00:00
je short .hasheq
add rdx, SYMTAB_SIZE
cmp rdx, r8
jb short .next_sym
jmp short .next_link
2019-03-11 14:11:35 +00:00
.hasheq:
2020-08-22 01:41:25 +00:00
%ifdef IFUNC_SUPPORT
mov cl , [rdx + ST_INFO_OFF]
%endif
mov rax, [rdx + ST_VALUE_OFF]
2019-04-02 14:58:36 +00:00
%ifdef SKIP_ZERO_VALUE
or rax, rax
jz short .next_link
%endif
add rax, [r12 + L_ADDR_OFF]
2020-08-22 01:41:25 +00:00
%ifdef IFUNC_SUPPORT
and cl, ST_INFO__STT_MASK
cmp cl, STT_GNU_IFUNC
%ifdef SKIP_ZERO_VALUE
jne short .no_ifunc2
push rdi
push r11
call rax
pop r11
pop rdi
.no_ifunc2:
%else ; !SKIP_ZERO_VALUE
je short .ifunc
2020-08-22 01:41:25 +00:00
.no_ifunc:
%endif
2020-08-22 01:41:25 +00:00
%endif
stosq
cmp HASH_END_TYP [rdi], 0
%ifdef IFUNC_SUPPORT
%ifdef SKIP_ZERO_VALUE
jne .next_hash;short .next_hash
%else ; IFUNC_SUPPORT && !SKIP_ZERO_VALUE
jne short .next_hash
%endif
%else ; !IFUNC_SUPPORT
jne short .next_hash
%endif
2020-08-22 01:41:25 +00:00
%ifdef IFUNC_SUPPORT
%ifndef SKIP_ZERO_VALUE
2020-08-22 01:41:25 +00:00
jmp short .break_loop
.ifunc:
;;int3 ; in this call, we lose rax rcx rdx rsi rdi r8 r9 r10 r11
; we only need persistence for rdi and r11 tho
;push rcx
;push rdx
;push rsi
push rdi
;push r8
;push r9
;push r10
push r11
call rax
pop r11
;pop r10
;pop r9
;pop r8
pop rdi
;pop rsi
;pop rdx
;pop rcx
jmp short .no_ifunc
.break_loop:
%endif
%endif
2020-08-22 01:41:25 +00:00
; if USE_DNLOAD_LOADER
%else
push _smol_start
push r12
push -1
pop rcx
pop rdi
pop rax
2019-03-11 14:11:35 +00:00
repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller
sub rdi, r12
sub rdi, LF_ENTRY_OFF+4
xchg r9, rdi
2019-01-30 23:14:36 +00:00
push _symbols
; back up link_map root
push r12
pop r11
pop rdi
2020-08-24 20:44:56 +00:00
%ifdef HANG_ON_STARTUP
.loopme: jmp short .loopme ; debugging
%endif
.next_hash:
mov r14d, dword [rdi]
; assume we need at least one function
; or al, al
; jz short .needed_end
mov r12, r11
; push r11
push r14
pop rbx
; pop r12
; shift right because we don't want to compare the lowest bit
shr ebx, 1
.next_link:
mov r12, [r12 + L_NEXT_OFF]
lea r10, [r12 + r9 + R10_BIAS]
; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
xor edx, edx
push r14
pop rax
mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS]
div ecx
; uint32_t bucket(ecx) = entry->l_gnu_buckets[bkt_ind]
mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
mov ecx, dword [r8 + rdx * 4]
; can be ignored apparently?
; jecxz .next_link
.next_chain:
; uint32_t luhash(edx) = entry->l_gnu_chain_zero[bucket] >> 1
mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
mov edx, dword [rdx + rcx * 4]
; TODO: make this not suck. (maybe using bt*?)
mov al, dl
shr edx, 1
; if (luhash == hash) break;
cmp edx, ebx
je short .chain_break
; ++bucket; } while (luhash & 1);
and al, 1
jnz short .next_link
inc ecx
jmp short .next_chain
2019-01-30 23:14:36 +00:00
.chain_break:
; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr
; ElfW(Sym)* sym = &symtab[bucket]
; *phash = sym->st_value + entry->l_addr
; ElfW(Dyn)* dyn(rax) = entry->l_info[DT_SYMTAB]
mov rax, [r12 + L_INFO_DT_SYMTAB_OFF]
; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr
mov rax, [rax + D_UN_PTR_OFF]
; ElfW(Addr) symoff(rax) = symtab[bucket].st_value
lea rdx, [rcx + rcx * 2]
2020-08-22 01:41:25 +00:00
%ifdef IFUNC_SUPPORT
; large opcode, but, ~almost the same as the next one, so,
; should compress well
mov rcx, [rax + rdx * 8 + ST_VALUE_OFF]
mov rax, [rax + rdx * 8 + ST_INFO_OFF ] ; actually just 'al' needed here
2019-04-02 14:58:36 +00:00
%ifdef SKIP_ZERO_VALUE
jrcxz .next_link
2019-04-02 14:58:36 +00:00
%endif
; void* finaladdr(rcx) = symoff + entry->l_addr
add rcx, [r12 + L_ADDR_OFF]
2020-08-22 01:41:25 +00:00
; is this an ifunc?
and al, ST_INFO__STT_MASK
cmp al, STT_GNU_IFUNC
xchg rcx, rax
2020-08-22 01:41:25 +00:00
jne .no_ifunc
; if so: call the resolver
push rdi
push r11
call rax
pop r11
pop rdi
.no_ifunc:
; IFUNC_SUPPORT
%else
mov rax, [rax + rdx * 8 + ST_VALUE_OFF]
%ifdef SKIP_ZERO_VALUE
or rax, rax ; zero value => weak symbol or sth
jz short .next_link
2020-08-22 01:41:25 +00:00
%endif
; void* finaladdr(rax) = symoff + entry->l_addr
add rax, [r12 + L_ADDR_OFF]
; IFUNC_SUPPORT
%endif
stosq ; *phash = finaladdr
cmp HASH_END_TYP [rdi], 0
jne short .next_hash
; } while (1)
; jmp short .next_hash
2019-01-30 23:14:36 +00:00
; if USE_DNLOAD_LOADER ... else ...
%endif
2019-01-30 23:14:36 +00:00
.needed_end:
; int3 ; debugging
; xor rbp, rbp ; still 0 from _dl_start_user
%ifndef NO_START_ARG
; arg for _start
2019-01-30 23:14:36 +00:00
mov rdi, rsp
%endif
%ifdef ALIGN_STACK
push rax
%endif
2019-02-02 17:14:28 +00:00
%ifdef USE_DL_FINI
2019-01-30 23:14:36 +00:00
xchg rsi, r13 ; _dl_fini
2019-02-02 17:14:28 +00:00
%endif
2019-01-30 23:14:36 +00:00
; fallthru to _start
%ifdef ELF_TYPE
global _smol_rt_end:
_smol_rt_end:
%endif
2019-01-30 23:14:36 +00:00
;.loopme: jmp short .loopme