{simple,shoddy,smart} minsize-oriented linker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

371 lines
9.2 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. ; vim: set ft=nasm et:
  2. %ifndef HASH_END_TYP
  3. %warning "W: HASH_END_TYP not defined, falling back to 16-bit!"
  4. %define HASH_END_TYP word
  5. %endif
  6. ;%define R10_BIAS (0x2B4)
  7. %define R10_BIAS (0x2B4+0x40)
  8. %include "rtld.inc"
  9. %ifdef ELF_TYPE
  10. [section .text.startup.smol]
  11. %else
  12. ; not defined -> debugging!
  13. [section .text]
  14. %endif
  15. ; r9 : ptrdiff_t glibc_vercompat_extra_hi_field_off
  16. ; r10: struct link_map* entry + far correction factor
  17. ; r12: struct link_map* entry
  18. ; r14: struct link_map* root
  19. ; r13: _dl_fini address (reqd by the ABI)
  20. %ifndef ELF_TYPE
  21. extern _symbols
  22. global _start
  23. _start:
  24. %endif
  25. global _smol_start:
  26. _smol_start:
  27. %ifdef USE_DL_FINI
  28. xchg r13, rdx ; _dl_fini
  29. %endif
  30. %ifdef USE_DT_DEBUG
  31. mov r12, [rel _DEBUG]
  32. mov r12, [r12 + 8]
  33. %else
  34. mov r12, [rsp - 8] ; return address of _dl_init
  35. mov ebx, dword [r12 - 20] ; decode part of 'mov rdi, [rel _rtld_global]'
  36. mov r12, [r12 + rbx - 16] ; ???
  37. %endif
  38. ; struct link_map* root = r12
  39. %ifdef SKIP_ENTRIES
  40. mov r12, [r12 + L_NEXT_OFF] ; skip this binary
  41. ; mov r12, [r12 + L_NEXT_OFF] ; skip the vdso
  42. ; the second one isn't needed anymore, see code below (.next_link)
  43. %endif
  44. %ifdef USE_DNLOAD_LOADER
  45. push _symbols
  46. push r12
  47. pop r11
  48. pop rdi
  49. ;.loopme: jmp short .loopme ; debugging
  50. .next_hash:
  51. mov r14d, dword [rdi]
  52. ; assume it's nonzero
  53. push r11
  54. pop r12
  55. .next_link:
  56. mov r12, [r12 + L_NEXT_OFF]
  57. ; ElfW(Dyn)* dyn(rsi) = r12->l_ld
  58. mov rsi, [r12 + L_LD_OFF]
  59. ; get strtab off
  60. .next_dyn:
  61. lodsq
  62. cmp al, DT_STRTAB
  63. lodsq
  64. jne short .next_dyn
  65. ; void* addr(rcx) = r12->l_addr
  66. ; const char* strtab(r8)=lookup(rsi,DT_STRTAB)/*,*symtab_end(r9)=r8*/;
  67. mov rcx, [r12 + L_ADDR_OFF]
  68. cmp rax, rcx
  69. jae short .noreldynaddr
  70. add rax, rcx
  71. .noreldynaddr:
  72. push rax
  73. ; push rax
  74. pop r8
  75. ; pop r9
  76. ; const ElfW(Sym)* symtab(rdx) = lookup(rsi, DT_SYMTAB);
  77. lodsq ; SYMTAB d_tag
  78. lodsq ; SYMTAB d_un.d_ptr
  79. cmp rax, rcx
  80. jae short .norelsymaddr
  81. add rax, rcx
  82. .norelsymaddr:
  83. ; xchg rax, rdx
  84. push rax
  85. pop rdx
  86. .next_sym:
  87. mov esi, dword [rdx + ST_NAME_OFF]
  88. add rsi, r8;9
  89. %ifndef USE_CRC32C_HASH
  90. ; djb2
  91. xor ecx, ecx
  92. push 33
  93. push 5381
  94. ; push 0
  95. ; pop rcx
  96. pop rax
  97. pop rbx
  98. %else
  99. ; crc32
  100. push -1
  101. pop rcx
  102. %endif
  103. .nexthashiter:
  104. %ifndef USE_CRC32C_HASH
  105. ; djb2
  106. ; TODO: optimize register usage a bit more
  107. xchg eax, ecx
  108. %endif
  109. lodsb
  110. or al, al
  111. %ifndef USE_CRC32C_HASH
  112. ; djb2
  113. xchg eax, ecx
  114. %endif
  115. jz short .breakhash
  116. %ifndef USE_CRC32C_HASH
  117. ; djb2
  118. push rdx
  119. mul ebx
  120. pop rdx
  121. add eax, ecx
  122. %else
  123. ; crc32c
  124. crc32 ecx, al
  125. %endif
  126. jmp short .nexthashiter
  127. .breakhash:
  128. %ifdef USE_CRC32C_HASH
  129. ; crc32c
  130. cmp r14d, ecx
  131. %else
  132. ; djb2
  133. cmp r14d, eax
  134. %endif
  135. je short .hasheq
  136. add rdx, SYMTAB_SIZE
  137. cmp rdx, r8
  138. jb short .next_sym
  139. jmp short .next_link
  140. .hasheq:
  141. %ifdef IFUNC_SUPPORT
  142. mov cl , [rdx + ST_INFO_OFF]
  143. %endif
  144. mov rax, [rdx + ST_VALUE_OFF]
  145. %ifdef SKIP_ZERO_VALUE
  146. or rax, rax
  147. jz short .next_link
  148. %endif
  149. add rax, [r12 + L_ADDR_OFF]
  150. %ifdef IFUNC_SUPPORT
  151. and cl, ST_INFO__STT_MASK
  152. cmp cl, STT_GNU_IFUNC
  153. %ifdef SKIP_ZERO_VALUE
  154. jne short .no_ifunc2
  155. push rdi
  156. push r11
  157. call rax
  158. pop r11
  159. pop rdi
  160. .no_ifunc2:
  161. %else ; !SKIP_ZERO_VALUE
  162. je short .ifunc
  163. .no_ifunc:
  164. %endif
  165. %endif
  166. stosq
  167. cmp HASH_END_TYP [rdi], 0
  168. %ifdef IFUNC_SUPPORT
  169. %ifdef SKIP_ZERO_VALUE
  170. jne .next_hash;short .next_hash
  171. %else ; IFUNC_SUPPORT && !SKIP_ZERO_VALUE
  172. jne short .next_hash
  173. %endif
  174. %else ; !IFUNC_SUPPORT
  175. jne short .next_hash
  176. %endif
  177. %ifdef IFUNC_SUPPORT
  178. %ifndef SKIP_ZERO_VALUE
  179. jmp short .break_loop
  180. .ifunc:
  181. ;;int3 ; in this call, we lose rax rcx rdx rsi rdi r8 r9 r10 r11
  182. ; we only need persistence for rdi and r11 tho
  183. ;push rcx
  184. ;push rdx
  185. ;push rsi
  186. push rdi
  187. ;push r8
  188. ;push r9
  189. ;push r10
  190. push r11
  191. call rax
  192. pop r11
  193. ;pop r10
  194. ;pop r9
  195. ;pop r8
  196. pop rdi
  197. ;pop rsi
  198. ;pop rdx
  199. ;pop rcx
  200. jmp short .no_ifunc
  201. .break_loop:
  202. %endif
  203. %endif
  204. ; if USE_DNLOAD_LOADER
  205. %else
  206. push _smol_start
  207. push r12
  208. push -1
  209. pop rcx
  210. pop rdi
  211. pop rax
  212. repne scasd ; technically, scasq should be used, but meh. this is 1 byte smaller
  213. sub rdi, r12
  214. sub rdi, LF_ENTRY_OFF+4
  215. xchg r9, rdi
  216. push _symbols
  217. ; back up link_map root
  218. push r12
  219. pop r11
  220. pop rdi
  221. ;.loopme: jmp short .loopme ; debugging
  222. .next_hash:
  223. mov r14d, dword [rdi]
  224. ; assume we need at least one function
  225. ; or al, al
  226. ; jz short .needed_end
  227. mov r12, r11
  228. ; push r11
  229. push r14
  230. pop rbx
  231. ; pop r12
  232. ; shift right because we don't want to compare the lowest bit
  233. shr ebx, 1
  234. .next_link:
  235. mov r12, [r12 + L_NEXT_OFF]
  236. lea r10, [r12 + r9 + R10_BIAS]
  237. ; uint32_t bkt_ind(edx) = hash % entry->l_nbuckets
  238. xor edx, edx
  239. push r14
  240. pop rax
  241. mov ecx, dword [r10 + LF_NBUCKETS_OFF - R10_BIAS]
  242. div ecx
  243. ; uint32_t bucket(ecx) = entry->l_gnu_buckets[bkt_ind]
  244. mov r8 , [r10 + LF_GNU_BUCKETS_OFF - R10_BIAS]
  245. mov ecx, dword [r8 + rdx * 4]
  246. ; can be ignored apparently?
  247. ; jecxz .next_link
  248. .next_chain:
  249. ; uint32_t luhash(edx) = entry->l_gnu_chain_zero[bucket] >> 1
  250. mov rdx, [r10 + LF_GNU_CHAIN_ZERO_OFF - R10_BIAS]
  251. mov edx, dword [rdx + rcx * 4]
  252. ; TODO: make this not suck. (maybe using bt*?)
  253. mov al, dl
  254. shr edx, 1
  255. ; if (luhash == hash) break;
  256. cmp edx, ebx
  257. je short .chain_break
  258. ; ++bucket; } while (luhash & 1);
  259. and al, 1
  260. jnz short .next_link
  261. inc ecx
  262. jmp short .next_chain
  263. .chain_break:
  264. ; ElfW(Sym)* symtab = entry->l_info[DT_SYMTAB]->d_un.d_ptr
  265. ; ElfW(Sym)* sym = &symtab[bucket]
  266. ; *phash = sym->st_value + entry->l_addr
  267. ; ElfW(Dyn)* dyn(rax) = entry->l_info[DT_SYMTAB]
  268. mov rax, [r12 + L_INFO_DT_SYMTAB_OFF]
  269. ; ElfW(Sym)* symtab(rax) = dyn->d_un.d_ptr
  270. mov rax, [rax + D_UN_PTR_OFF]
  271. ; ElfW(Addr) symoff(rax) = symtab[bucket].st_value
  272. lea rdx, [rcx + rcx * 2]
  273. %ifdef IFUNC_SUPPORT
  274. ; large opcode, but, ~almost the same as the next one, so,
  275. ; should compress well
  276. mov rcx, [rax + rdx * 8 + ST_VALUE_OFF]
  277. mov rax, [rax + rdx * 8 + ST_INFO_OFF ] ; actually just 'al' needed here
  278. %ifdef SKIP_ZERO_VALUE
  279. jrcxz .next_link
  280. %endif
  281. ; void* finaladdr(rcx) = symoff + entry->l_addr
  282. add rcx, [r12 + L_ADDR_OFF]
  283. ; is this an ifunc?
  284. and al, ST_INFO__STT_MASK
  285. cmp al, STT_GNU_IFUNC
  286. xchg rcx, rax
  287. jne .no_ifunc
  288. ; if so: call the resolver
  289. push rdi
  290. push r11
  291. call rax
  292. pop r11
  293. pop rdi
  294. .no_ifunc:
  295. ; IFUNC_SUPPORT
  296. %else
  297. mov rax, [rax + rdx * 8 + ST_VALUE_OFF]
  298. %ifdef SKIP_ZERO_VALUE
  299. or rax, rax ; zero value => weak symbol or sth
  300. jz short .next_link
  301. %endif
  302. ; void* finaladdr(rax) = symoff + entry->l_addr
  303. add rax, [r12 + L_ADDR_OFF]
  304. ; IFUNC_SUPPORT
  305. %endif
  306. stosq ; *phash = finaladdr
  307. cmp HASH_END_TYP [rdi], 0
  308. jne short .next_hash
  309. ; } while (1)
  310. ; jmp short .next_hash
  311. ; if USE_DNLOAD_LOADER ... else ...
  312. %endif
  313. .needed_end:
  314. ; int3 ; debugging
  315. ; xor rbp, rbp ; still 0 from _dl_start_user
  316. %ifndef NO_START_ARG
  317. ; arg for _start
  318. mov rdi, rsp
  319. %endif
  320. %ifdef ALIGN_STACK
  321. push rax
  322. %endif
  323. %ifdef USE_DL_FINI
  324. xchg rsi, r13 ; _dl_fini
  325. %endif
  326. ; fallthru to _start
  327. %ifdef ELF_TYPE
  328. global _smol_rt_end:
  329. _smol_rt_end:
  330. %endif
  331. ;.loopme: jmp short .loopme