From 1e17d117740dc0321f6492026011b925f6993593 Mon Sep 17 00:00:00 2001 From: PoroCYon Date: Sat, 8 Aug 2020 00:07:52 +0200 Subject: [PATCH] CRC32C-based hash (thanks Intel) (Python part is still TODO) --- .gitignore | 5 +++-- rt/loader32.asm | 34 ++++++++++++++++++++++------------ rt/loader64.asm | 45 +++++++++++++++++++++++++++++++++++---------- smol/emit.py | 36 +++++++++++++++++++++--------------- smol/shared.py | 31 +++++++++++++++++++++++++++++++ smold.py | 17 +++++++++++++---- 6 files changed, 125 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index 70ce409..fbb2428 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /bin /obj __pycache__ -smol-*-*-*/ -*.tar.xz + +smol-20*-*-*/ +smol*.tar.xz diff --git a/rt/loader32.asm b/rt/loader32.asm index e4a98d7..a54f575 100644 --- a/rt/loader32.asm +++ b/rt/loader32.asm @@ -104,34 +104,44 @@ _smol_start: add esi, ebx push ecx -%ifndef USE_HASH16 + + ; source in eax, result in eax +%ifdef USE_CRC32C_HASH + push -1 + pop eax +%else + %ifndef USE_HASH16 push ebx push 33 push 5381 pop eax pop ebx -%else + %else xor eax, eax -%endif + %endif xor ecx, ecx +%endif .nexthashiter: - ; xchg eax, ecx lodsb or al, al xchg eax, ecx jz short .breakhash -%ifndef USE_HASH16 +%ifdef USE_CRC32C_HASH + crc32 eax, cl +%else + %ifndef USE_HASH16 push edx mul ebx pop edx ; add eax, ecx -%else + %else ror ax, 2 ; add ax, cx -%endif + %endif add eax, ecx +%endif jmp short .nexthashiter .breakhash: @@ -173,16 +183,16 @@ _smol_start: cmp al, STT_GNU_IFUNC jne short .no_ifunc ;int3 -%ifdef IFUNC_CORRECT_CCONV + %ifdef IFUNC_CORRECT_CCONV ; call destroys stuff, but we only need to preserve edi ; for our purposes anyway. we do need one push to align the ; stack to 16 bytes push edi call ecx pop edi -%else + %else call ecx -%endif + %endif db 0x3c ; cmp al, --> jump over next insn .no_ifunc: xchg ecx, eax @@ -287,14 +297,14 @@ repne scasd cmp al, STT_GNU_IFUNC jne short .no_ifunc ;int3 -%ifdef IFUNC_CORRECT_CCONV + %ifdef IFUNC_CORRECT_CCONV ; call destroys stuff, but we only need to preserve edi ; for our purposes anyway. we do need one push to align the ; stack to 16 bytes push edi call ecx pop edi -%else + %else call ecx %endif db 0x3c ; cmp al, --> jump over next insn diff --git a/rt/loader64.asm b/rt/loader64.asm index 336ef65..3ab9aaf 100644 --- a/rt/loader64.asm +++ b/rt/loader64.asm @@ -1,4 +1,4 @@ -; vim: set ft=nasm: +; vim: set ft=nasm et: %ifndef HASH_END_TYP %warning "W: HASH_END_TYP not defined, falling back to 16-bit!" @@ -101,6 +101,8 @@ _smol_start: mov esi, dword [rdx + ST_NAME_OFF] add rsi, r8;9 +%ifndef USE_CRC32C_HASH + ; djb2 xor ecx, ecx push 33 push 5381 @@ -108,22 +110,45 @@ _smol_start: ; pop rcx pop rax pop rbx +%else + ; crc32 + push -1 + pop rcx +%endif .nexthashiter: +%ifndef USE_CRC32C_HASH + ; djb2 ; TODO: optimize register usage a bit more xchg eax, ecx +%endif lodsb or al, al +%ifndef USE_CRC32C_HASH + ; djb2 xchg eax, ecx +%endif jz short .breakhash +%ifndef USE_CRC32C_HASH + ; djb2 push rdx mul ebx pop rdx add eax, ecx +%else + ; crc32c + crc32 ecx, al +%endif jmp short .nexthashiter .breakhash: - +%ifdef USE_CRC32C_HASH + ; crc32c + cmp r14d, ecx +%else + ; djb2 cmp r14d, eax + +%endif je short .hasheq add rdx, SYMTAB_SIZE @@ -144,7 +169,7 @@ _smol_start: %ifdef IFUNC_SUPPORT and cl, ST_INFO__STT_MASK cmp cl, STT_GNU_IFUNC -%ifdef SKIP_ZERO_VALUE + %ifdef SKIP_ZERO_VALUE jne short .no_ifunc2 push rdi push r11 @@ -152,25 +177,25 @@ _smol_start: pop r11 pop rdi .no_ifunc2: -%else ; !SKIP_ZERO_VALUE + %else ; !SKIP_ZERO_VALUE je short .ifunc .no_ifunc: -%endif + %endif %endif stosq cmp HASH_END_TYP [rdi], 0 %ifdef IFUNC_SUPPORT -%ifdef SKIP_ZERO_VALUE + %ifdef SKIP_ZERO_VALUE jne .next_hash;short .next_hash -%else ; IFUNC_SUPPORT && !SKIP_ZERO_VALUE + %else ; IFUNC_SUPPORT && !SKIP_ZERO_VALUE jne short .next_hash -%endif + %endif %else ; !IFUNC_SUPPORT jne short .next_hash %endif %ifdef IFUNC_SUPPORT -%ifndef SKIP_ZERO_VALUE + %ifndef SKIP_ZERO_VALUE jmp short .break_loop .ifunc: ;;int3 ; in this call, we lose rax rcx rdx rsi rdi r8 r9 r10 r11 @@ -194,7 +219,7 @@ _smol_start: ;pop rcx jmp short .no_ifunc .break_loop: -%endif + %endif %endif ; if USE_DNLOAD_LOADER diff --git a/smol/emit.py b/smol/emit.py index 56aae42..440af98 100644 --- a/smol/emit.py +++ b/smol/emit.py @@ -37,13 +37,16 @@ def sort_imports(libraries, hashfn): if sys.version_info < (3, 6): return OrderedDict(ll) else: return dict(ll) -def output_x86(libraries, nx, h16, outf, det): +def output_x86(libraries, nx, hashid, outf, det): outf.write('; vim: set ft=nasm:\n') # be friendly - if nx: outf.write('%define USE_NX 1\n') - if h16: outf.write('%define USE_HASH16 1\n') + defff = define_for_hash[hashid] + if defff is not None: + outf.write('%define {} 1\n'.format(defff)) + if nx: + outf.write('%define USE_NX 1\n') - hashfn = hash_bsd2 if h16 else hash_djb2 + hashfn = get_hash_fn(hashid) if det: libraries = sort_imports(libraries, hashfn) outf.write('%%define HASH_END_TYP %s\n' % @@ -128,22 +131,25 @@ global {name} # end output_x86 -def output_amd64(libraries, nx, h16, outf, det): - if h16: +def output_amd64(libraries, nx, hashid, outf, det): + if hashid == HASH_BSD2: error("--hash16 not supported yet for x86_64 outputs.") - if nx: outf.write('%define USE_NX 1\n') -# if h16: outf.write('%define USE_HASH16 1\n') + outf.write('; vim: set ft=nasm:\n') + outf.write('bits 64\n') - hashfn = hash_djb2 #hash_bsd2 if h16 else hash_djb2 + defff = define_for_hash[hashid] + if defff is not None: + outf.write('%define {} 1\n'.format(defff)) + if nx: + outf.write('%define USE_NX 1\n') + + hashfn = get_hash_fn(hashid) if det: libraries = sort_imports(libraries, hashfn) outf.write('%%define HASH_END_TYP %s\n' % fetch_width_from_bits[get_min_check_width(libraries, hashfn)]) - outf.write('; vim: set ft=nasm:\n') - outf.write('bits 64\n') - shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries } outf.write('%include "header64.asm"\n') @@ -208,9 +214,9 @@ global {name} # end output_amd64 -def output(arch, libraries, nx, h16, outf, det): - if arch == 'i386': output_x86(libraries, nx, h16, outf, det) - elif arch == 'x86_64': output_amd64(libraries, nx, h16, outf, det) +def output(arch, libraries, nx, hashid, outf, det): + if arch == 'i386': output_x86(libraries, nx, hashid, outf, det) + elif arch == 'x86_64': output_amd64(libraries, nx, hashid, outf, det) else: error("E: cannot emit for arch '%s'" % str(arch)) diff --git a/smol/shared.py b/smol/shared.py index 545a9f4..c70003d 100644 --- a/smol/shared.py +++ b/smol/shared.py @@ -9,6 +9,16 @@ archmagic = { 'x86_64': 62, 62: 'x86_64', } +HASH_DJB2 = 0 +HASH_BSD2 = 1 +HASH_CRC32C=2 + +define_for_hash = { + HASH_DJB2: None + HASH_BSD2: 'USE_HASH16', + HASH_CRC32C: 'USE_CRC32C_HASH' +} + def hash_bsd2(s): h = 0 @@ -24,10 +34,31 @@ def hash_djb2(s): return h +def hash_crc32c(s): + # crc32 implementation is basically: + # sum = -1; for (; *s; ++s) crc32_instr(&sum, *s); return sum + assert False, "not implemented!" # TODO + + def eprintf(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) +def get_hash_id(h16, c32): + if not h16 and not c32: + return HASH_DJB2 + elif h16 and not c32: + return HASH_BSD2 + elif not h16 and c32: + return HASH_CRC32C + else: + return False, "??????? (shouldn't happen)" + + +def get_hash_fn(hid): + return (hash_djb2, hash_bsd2, hash_crc32c)[hid] + + def error(*args, **kwargs): traceback.print_stack() eprintf(*args, **kwargs) diff --git a/smold.py b/smold.py index 22d434b..57e2bbf 100755 --- a/smold.py +++ b/smold.py @@ -24,8 +24,11 @@ def main(): help="directories to search libraries in") parser.add_argument('-s', '--hash16', default=False, action='store_true', \ - help="Use 16-bit (BSD) hashes instead of 32-bit djb2 hashes. "+\ - "Implies -fuse-dnload-loader") + help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\ + "Implies -fuse-dnload-loader. Only usable for 32-bit output.") + parser.add_argument('-c', '--crc32c', default=False, action='store_true', \ + help="Use Intel's crc32 intrinsic for hashing. "+\ + "Implies -fuse-dnload-loader. Conflicts with `--hash16'.") parser.add_argument('-n', '--nx', default=False, action='store_true', \ help="Use NX (i.e. don't use RWE pages). Costs the size of one phdr, "+\ "plus some extra bytes on i386.") @@ -118,7 +121,10 @@ def main(): args = parser.parse_args() - if args.hash16: + if args.hash16 and args.crc32c: + error("Cannot combine --hash16 and --crc32c!") + + if args.hash16 or args.crc32c: args.fuse_dnload_loader = True if args.fskip_zero_value: args.asflags.insert(0, "-DSKIP_ZERO_VALUE") @@ -145,6 +151,9 @@ def main(): error("Unknown/unsupported architecture '%s'" % str(arch)) if args.verbose: eprintf("arch: %s" % str(arch)) + if args.hash16 and arch not in ('i386', 3): + error("Cannot use --hash16 for arch `%s' (not i386)" % (arch)) + objinput = None objinputistemp = False tmp_asm_file = args.output @@ -180,7 +189,7 @@ def main(): symbols[library].append((symbol, reloc)) with os.fdopen(tmp_asm_fd, mode='w') as taf: - output(arch, symbols, args.nx, args.hash16, taf, args.det) + output(arch, symbols, args.nx, get_hash_id(args.hash16, args.crc32c), taf, args.det) if args.verbose: eprintf("wrote symtab to %s" % tmp_asm_file)