mirror of https://github.com/Shizmob/smol
lots of stuff, see detailed desc.
* reorganize the project structure a bit (sorry not sorry :P) * src/ -> test/ * ldr/ -> src/, ld/ * mksyms -> smol.py (and smol*.py) * clean up and make the python script a bit less hacky * optimize the loader code (there can still be done more, though) * preserve the stack so argc, argv and envp can be read * more stuff
This commit is contained in:
parent
f6b9a927a6
commit
cccd9656de
|
@ -1,2 +1,3 @@
|
|||
/bin
|
||||
/obj
|
||||
/__pycache__
|
||||
|
|
68
Makefile
68
Makefile
|
@ -1,44 +1,66 @@
|
|||
LDRDIR = ldr
|
||||
OBJDIR = obj
|
||||
BINDIR = bin
|
||||
SRCDIR = src
|
||||
DATADIR = data
|
||||
OBJDIR := obj
|
||||
BINDIR := bin
|
||||
SRCDIR := src
|
||||
LDDIR := ld
|
||||
TESTDIR:= test
|
||||
|
||||
COPTFLAGS=-Os -fvisibility=hidden -mpreferred-stack-boundary=3 -fwhole-program \
|
||||
# -mpreferred-stack-boundary=3 messes up the stack and kills SSE!
|
||||
COPTFLAGS=-Os -fvisibility=hidden -fwhole-program \
|
||||
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer \
|
||||
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||||
CXXOPTFLAGS=$(COPTFLAGS) \
|
||||
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr \
|
||||
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
|
||||
|
||||
ASFLAGS=-f elf -I $(LDRDIR)/
|
||||
CFLAGS=-Wall -Wextra -Wpedantic -std=c99 $(COPTFLAGS) -nostartfiles -fno-PIC
|
||||
CFLAGS=-Wall -Wextra -Wpedantic -std=gnu11 -nostartfiles -fno-PIC $(COPTFLAGS)
|
||||
CXXFLAGS=-Wall -Wextra -Wpedantic -std=c++11 $(CXXOPTFLAGS) -nostartfiles -fno-PIC
|
||||
|
||||
ASFLAGS=-f elf -I $(SRCDIR)/
|
||||
LDFLAGS=-m elf_i386
|
||||
LDFLAGS_=$(LDFLAGS) -T $(LDDIR)/link.ld --oformat=binary
|
||||
|
||||
CFLAGS += -m32
|
||||
CXXFLAGS += -m32
|
||||
|
||||
LIBS=-lc
|
||||
LDFLAGS=--oformat=binary -T ldr/link.ld
|
||||
|
||||
ASFLAGS += -DUSE_INTERP
|
||||
|
||||
.PHONY: all
|
||||
all: $(BINDIR)/test
|
||||
NASM ?= nasm
|
||||
PYTHON3 ?= python3
|
||||
|
||||
all: $(BINDIR)/sdl $(BINDIR)/hello
|
||||
|
||||
LIBS += -lSDL2 -lGL
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf $(OBJDIR)/* $(BINDIR)/*
|
||||
@$(RM) -vrf $(OBJDIR) $(BINDIR)
|
||||
|
||||
%/:
|
||||
@mkdir -vp "$@"
|
||||
|
||||
.SECONDARY:
|
||||
|
||||
$(OBJDIR)/%.o: $(SRCDIR)/%.c
|
||||
$(CC) -m32 $(CFLAGS) -c $^ -o $@
|
||||
$(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/
|
||||
$(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
|
||||
$(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/
|
||||
$(CC) -m32 $(CFLAGS) -c "$<" -o "$@"
|
||||
|
||||
$(OBJDIR)/%.o.syms: $(OBJDIR)/%.o
|
||||
readelf -s $^ | grep UND | sed 1d | awk '{ print $$8 }' > $@
|
||||
$(OBJDIR)/%.start.o: $(OBJDIR)/%.o $(OBJDIR)/crt1.o
|
||||
$(LD) $(LDFLAGS) -r -o "$@" $^
|
||||
|
||||
$(OBJDIR)/symbols.%.s: $(OBJDIR)/%.o.syms
|
||||
$(LDRDIR)/mksyms $(LIBS) $$(cat $^) > $@
|
||||
$(OBJDIR)/crt1.o: $(SRCDIR)/crt1.c $(OBJDIR)/
|
||||
$(CC) $(CFLAGS) -c "$<" -o "$@"
|
||||
|
||||
$(OBJDIR)/header.%.o: $(OBJDIR)/symbols.%.s $(LDRDIR)/header.s $(LDRDIR)/loader.s
|
||||
nasm -DUSE_INTERP $(ASFLAGS) $< -o $@
|
||||
$(OBJDIR)/symbols.%.asm: $(OBJDIR)/%.start.o
|
||||
$(PYTHON3) ./smol.py $(LIBS) "$<" "$@"
|
||||
|
||||
$(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header.asm \
|
||||
$(SRCDIR)/loader.asm
|
||||
$(NASM) $(ASFLAGS) $< -o $@
|
||||
|
||||
$(BINDIR)/%: $(OBJDIR)/%.start.o $(OBJDIR)/stub.%.o $(BINDIR)/
|
||||
$(LD) $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.o -o "$@"
|
||||
|
||||
.PHONY: all clean
|
||||
|
||||
$(BINDIR)/%: $(OBJDIR)/%.o $(OBJDIR)/header.%.o
|
||||
$(LD) -m elf_i386 $(LDFLAGS) $^ -o $@
|
||||
|
|
|
@ -6,9 +6,9 @@ SECTIONS {
|
|||
.header : { *(.header) }
|
||||
|
||||
.text : {
|
||||
*(.text._smol_start)
|
||||
*(.text._start)
|
||||
*(.text .rdata .rdata.* .rodata .rodata.*)
|
||||
*(.text.startup.smol)
|
||||
*(.text.startup._start)
|
||||
*(.text .text.* .rdata .rdata.* .rodata .rodata.*)
|
||||
}
|
||||
|
||||
.data : {
|
||||
|
@ -22,5 +22,5 @@ SECTIONS {
|
|||
*(.*)
|
||||
}
|
||||
|
||||
_size = . - 0x400000;
|
||||
_smol_total_size = . - 0x400000;
|
||||
}
|
102
ldr/header.s
102
ldr/header.s
|
@ -1,102 +0,0 @@
|
|||
; vim: set ft=nasm:
|
||||
%define ORIGIN 0x400000
|
||||
|
||||
extern _size
|
||||
[section .header]
|
||||
|
||||
header:
|
||||
; e_ident
|
||||
db 0x7F, "ELF" ; EI_MAG0-EI_MAG3
|
||||
db 1 ; EI_CLASS: 1 = 32-bit
|
||||
db 1 ; EI_DATA: 1 = LSB
|
||||
db 1 ; EI_VERSION
|
||||
db 3 ; EI_OSABI: 3 = Linux
|
||||
db 1 ; EI_OSABIVERSION
|
||||
times 7 db 0 ; EI_PAD, ld.so is a busta and won't let us use our leet group tags for padding bytes :(
|
||||
; e_type: 2 = executable
|
||||
dw 2
|
||||
; e_machine: 3 = x86
|
||||
dw 3
|
||||
; e_version
|
||||
dd 1
|
||||
; e_entry
|
||||
dd _smol_start
|
||||
; e_phoff
|
||||
dd (.segments - header)
|
||||
; e_shoff
|
||||
dd 0
|
||||
; e_flags
|
||||
dd 0
|
||||
; e_ehsize
|
||||
dw (.segments - header)
|
||||
; e_phentsize
|
||||
dw (.segments.load - .segments.dynamic)
|
||||
.segments:
|
||||
%ifdef USE_INTERP
|
||||
.segments.interp:
|
||||
; {e_phnum: 2, e_shentsize: 0}, p_type: 3 = PT_INTERP
|
||||
dd 3
|
||||
; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
|
||||
dd (.interp - header)
|
||||
; p_vaddr
|
||||
dd .interp
|
||||
; p_paddr
|
||||
dd .interp
|
||||
; p_filesz
|
||||
dd (.interp.end-.interp)
|
||||
; p_memsz
|
||||
dd (.interp.end-.interp)
|
||||
; p_flags, p_align
|
||||
dd 0,0
|
||||
%endif
|
||||
.segments.dynamic:
|
||||
; {e_phnum: 2, e_shentsize: 0}, p_type: 2 = PT_DYNAMIC
|
||||
dd 2
|
||||
; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
|
||||
dd (.dynamic - header)
|
||||
; p_vaddr
|
||||
dd .dynamic
|
||||
; p_paddr
|
||||
dd 0
|
||||
; p_filesz
|
||||
dd (.dynamic.end - .dynamic)
|
||||
; p_memsz
|
||||
dd (.dynamic.end - .dynamic)
|
||||
; p_flags, p_align
|
||||
dq 0
|
||||
.segments.load:
|
||||
; p_type: 1 = PT_LOAD
|
||||
dd 1
|
||||
; p_offset
|
||||
dd 0
|
||||
; p_vaddr
|
||||
dd ORIGIN
|
||||
; p_paddr
|
||||
dd 0
|
||||
; p_filesz
|
||||
dd _size
|
||||
; p_memsz
|
||||
dd _size
|
||||
; p_flags: 1 = execute, 4 = read
|
||||
dd (1 | 2 | 4)
|
||||
; p_align
|
||||
dd 0x1000
|
||||
.segments.end:
|
||||
%ifdef USE_INTERP
|
||||
.interp:
|
||||
db "/lib/ld-linux.so.2",0
|
||||
.interp.end:
|
||||
%endif
|
||||
.dynamic:
|
||||
.dynamic.strtab:
|
||||
; d_tag: 5 = DT_STRTAB
|
||||
dd 5
|
||||
; d_un.d_ptr
|
||||
dd _symbols
|
||||
.dynamic.symtab:
|
||||
; this is required to be present or ld.so will crash, but it can be bogus
|
||||
; d_tag: 6 = DT_SYMTAB
|
||||
dd 6
|
||||
; d_un.d_ptr
|
||||
dd 0
|
||||
|
156
ldr/loader.s
156
ldr/loader.s
|
@ -1,156 +0,0 @@
|
|||
; vim: set ft=nasm ts=8:
|
||||
|
||||
%define LM_NAME_OFFSET 0x4
|
||||
%define LM_NEXT_OFFSET 0xC
|
||||
%define LM_ADDR_OFFSET 0
|
||||
%define LM_INFO_OFFSET 0x20
|
||||
|
||||
; by default, use the offset 'correction' from glibc 2.28
|
||||
%define LM_ENTRY_OFFSET_BASE 340
|
||||
|
||||
%define LM_NBUCKETS_OFFSET 0x178
|
||||
%define LM_GNU_BUCKETS_OFFSET 0x188
|
||||
%define LM_GNU_CHAIN_ZERO_OFFSET 0x18C
|
||||
|
||||
%define DT_VALUE_OFFSET 0x4
|
||||
%define DYN_PTR_OFFSET 0x4
|
||||
|
||||
%define DT_SYMTAB 0x6
|
||||
%define DT_SYMSIZE_SHIFT 4
|
||||
|
||||
lm_off_extra:
|
||||
dd 0
|
||||
|
||||
[section .text._smol_start]
|
||||
|
||||
strcmp: ; (const char *s1 (esi), const char *s2 (edi))
|
||||
push esi
|
||||
push edi
|
||||
.cmp: lodsb
|
||||
or al, al
|
||||
jz .done
|
||||
sub al, [edi]
|
||||
jnz .done
|
||||
inc edi
|
||||
jmp .cmp
|
||||
.done: pop edi
|
||||
pop esi
|
||||
ret
|
||||
|
||||
|
||||
basename: ; (const char *s (esi))
|
||||
push esi
|
||||
push edi
|
||||
mov edi, esi
|
||||
.cmp: lodsb
|
||||
or al, al
|
||||
jz .done
|
||||
cmp al, 47 ; '/'
|
||||
cmove edi, esi
|
||||
jmp .cmp
|
||||
.done: mov eax, edi
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
|
||||
|
||||
link_symbol: ; (struct link_map *entry, uint32_t *h)
|
||||
mov ecx, esi
|
||||
|
||||
; eax = *h % entry->l_nbuckets
|
||||
mov eax, [ecx]
|
||||
xor edx, edx
|
||||
mov ebx, [ebp + edi + LM_NBUCKETS_OFFSET]
|
||||
div ebx
|
||||
; eax = entry->l_gnu_buckets[eax]
|
||||
mov eax, [ebp + edi + LM_GNU_BUCKETS_OFFSET]
|
||||
mov eax, [eax + edx * 4]
|
||||
; *h |= 1
|
||||
or word [ecx], 1
|
||||
.check_bucket: ; edx = entry->l_gnu_chain_zero[eax] | 1
|
||||
mov edx, [ebp + edi + LM_GNU_CHAIN_ZERO_OFFSET]
|
||||
mov edx, [edx + eax * 4]
|
||||
or edx, 1
|
||||
; check if this is our symbol
|
||||
cmp edx, [ecx]
|
||||
je .found
|
||||
inc eax
|
||||
jmp .check_bucket
|
||||
.found: ; it is! edx = entry->l_info[DT_SYMTAB]->d_un.d_ptr
|
||||
mov edx, [ebp + LM_INFO_OFFSET + DT_SYMTAB * 4]
|
||||
mov edx, [edx + DYN_PTR_OFFSET]
|
||||
; edx = edx[eax].dt_value + entry->l_addr
|
||||
shl eax, DT_SYMSIZE_SHIFT
|
||||
mov edx, [edx + eax + DT_VALUE_OFFSET]
|
||||
add edx, [ebp + LM_ADDR_OFFSET]
|
||||
sub edx, ecx
|
||||
sub edx, 4
|
||||
; finally, write it back!
|
||||
mov [ecx], edx
|
||||
ret
|
||||
|
||||
|
||||
link: ; (struct link_map *root, char *symtable)
|
||||
mov eax, [esp+4]
|
||||
mov esi, [esp+8]
|
||||
.do_library: ; null library name means end of symbol table, we're done
|
||||
cmp byte [esi], 0
|
||||
jz .done
|
||||
; setup start of map again
|
||||
mov ebp, eax
|
||||
push eax
|
||||
.find_map_entry: ; compare basename(entry->l_name) to lib name, if so we got a match
|
||||
push esi
|
||||
mov esi, [ebp + LM_NAME_OFFSET]
|
||||
call basename
|
||||
mov edi, eax
|
||||
pop esi
|
||||
call strcmp
|
||||
jz .process_map_entry
|
||||
; no match, next entry it is!
|
||||
mov ebp, [ebp + LM_NEXT_OFFSET]
|
||||
jmp .find_map_entry
|
||||
.process_map_entry: ; skip past the name in the symbol table now to get to the symbols
|
||||
lodsb
|
||||
or al, al
|
||||
jnz .process_map_entry
|
||||
|
||||
.do_symbols: ; null byte means end of symbols for this library!
|
||||
cmp byte [esi], 0
|
||||
jz .next_library
|
||||
inc esi
|
||||
push edi
|
||||
mov edi, [lm_off_extra]
|
||||
call link_symbol
|
||||
pop edi
|
||||
add esi, 4
|
||||
jmp .do_symbols
|
||||
.next_library: pop eax
|
||||
inc esi
|
||||
jmp .do_library
|
||||
.done: ret
|
||||
|
||||
|
||||
extern _start
|
||||
_smol_start:
|
||||
; try to get the 'version-agnostic' pffset of the stuff we're
|
||||
; interested in
|
||||
mov ebx, eax
|
||||
mov esi, eax
|
||||
.looper:
|
||||
lodsd
|
||||
cmp dword eax, _smol_start
|
||||
jne short .looper
|
||||
sub esi, ebx
|
||||
sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsb into acct
|
||||
mov [lm_off_extra], esi
|
||||
|
||||
mov eax, ebx
|
||||
|
||||
push _symbols
|
||||
push eax
|
||||
call link
|
||||
|
||||
;jmp short _start
|
||||
; by abusing the linker script, _start ends up right here :)
|
||||
|
105
ldr/mksyms
105
ldr/mksyms
|
@ -1,105 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import glob
|
||||
import sys
|
||||
import os.path
|
||||
import subprocess
|
||||
import itertools
|
||||
import argparse
|
||||
|
||||
|
||||
def hash_djb2(s):
|
||||
h = 5381
|
||||
for c in s:
|
||||
h = (h * 33 + ord(c)) & 0xFFFFFFFF
|
||||
return h
|
||||
|
||||
|
||||
def output_x86(libraries):
|
||||
shorts = { l: l.split('.', 1)[0].lower() for l in libraries }
|
||||
|
||||
print('%include "header.s"')
|
||||
print('.dynamic.needed:')
|
||||
for library in libraries:
|
||||
print('dd 1')
|
||||
print('dd (_symbols.{} - _symbols)'.format(shorts[library]))
|
||||
print('.dynamic.end:')
|
||||
print('_symbols:')
|
||||
for library, symbols in libraries.items():
|
||||
print('\t_symbols.{}: db "{}",0'.format(shorts[library], library))
|
||||
|
||||
for sym in symbols:
|
||||
hash = hash_djb2(sym)
|
||||
print("""
|
||||
\t\tglobal {name}
|
||||
\t\t{name}: db 0xE9
|
||||
\t\t dd 0x{hash:x}
|
||||
""".strip('\n').format(name=sym, hash=hash))
|
||||
|
||||
print('\tdb 0')
|
||||
print('db 0')
|
||||
print('%include "loader.s"')
|
||||
|
||||
|
||||
def get_cc_paths():
|
||||
output = subprocess.check_output(['cc', '-print-search-dirs'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
paths = {}
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
category, path = entry.split(': ', 1)
|
||||
path = path.lstrip('=')
|
||||
paths[category] = list(set(os.path.realpath(p) \
|
||||
for p in path.split(':') if os.path.isdir(p)))
|
||||
return paths
|
||||
|
||||
def is_valid_elf(f):
|
||||
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
|
||||
|
||||
def find_lib(spaths, wanted):
|
||||
for p in spaths:
|
||||
for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
for f in glob.glob(glob.escape(p) + '/' + wanted + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
|
||||
|
||||
sys.stderr.write("E: couldn't find library '" + wanted + "'.")
|
||||
sys.exit(1)
|
||||
|
||||
def find_libs(spaths, wanted): return map(lambda l: find_lib(spaths, l), wanted)
|
||||
|
||||
def find_symbol(libraries, libnames, symbol):
|
||||
output = subprocess.check_output(['scanelf', '-B', '-F' '%s %S', '-s', \
|
||||
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
sym, soname, path = entry.split(' ', 2)
|
||||
if symbol in sym.split(',') and any(soname.startswith('lib'+l) for l in libnames):
|
||||
return soname
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-a', '--architecture', default='x86', help='architecture to generate asm code for')
|
||||
parser.add_argument('-l', '--library', metavar='LIB', action='append', help='libraries to link against')
|
||||
parser.add_argument('symbols', metavar='SYM', nargs='*', help='symbol to search for')
|
||||
args = parser.parse_args()
|
||||
|
||||
paths = get_cc_paths()
|
||||
libraries=paths['libraries']
|
||||
libnames = args.library
|
||||
libs = list(find_libs(libraries, libnames))
|
||||
sys.stderr.write('libs='+str(libs)+'\n')
|
||||
symbols = {}
|
||||
|
||||
for symbol in args.symbols:
|
||||
library = find_symbol(libs, libnames, symbol)
|
||||
if not library:
|
||||
sys.stderr.write('could not find symbol: {}\n'.format(symbol))
|
||||
sys.exit(1)
|
||||
symbols.setdefault(library, [])
|
||||
symbols[library].append(symbol)
|
||||
|
||||
output_x86(symbols)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
68
smol
68
smol
|
@ -1,68 +0,0 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
COPTFLAGS=$(cat <<'EOF'
|
||||
-Os -fvisibility=hidden -mpreferred-stack-boundary=2 -fwhole-program
|
||||
-ffast-math -funsafe-math-optimizations -fno-stack-protector -fomit-frame-pointer
|
||||
-fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables
|
||||
EOF
|
||||
)
|
||||
CXXOPTFLAGS=$(cat <<EOF
|
||||
$CXXOPTFLAGS
|
||||
-fno-rtti -fno-enforce-eh-specs -fnothrow-opt -fno-use-cxa-get-exception-ptr
|
||||
-fno-implicit-templates -fno-threadsafe-statics -fno-use-cxa-atexit
|
||||
EOF
|
||||
)
|
||||
|
||||
CC="${CC:-cc}"
|
||||
CXX="${CXX:-c++}"
|
||||
CFLAGS="-Wall -Wextra -Wpedantic -std=c99 $COPTFLAGS -nostartfiles -fno-PIC"
|
||||
CXXFLAGS="-Wall -Wextra -Wpedantic -std=c++11 $CXXOPTFLAGS -nostartfiles -fno-PIC"
|
||||
ASFLAGS="-f elf -I ldr/"
|
||||
LDFLAGS="--oformat=binary -T ldr/link.ld"
|
||||
|
||||
mkdir -p bin
|
||||
rm -rf obj
|
||||
mkdir -p obj
|
||||
|
||||
cleared=
|
||||
mksym_args=
|
||||
files=
|
||||
symbols=
|
||||
output=s.out
|
||||
i=1
|
||||
|
||||
add_out() {
|
||||
out="obj/$i"
|
||||
files="$files $out"
|
||||
i=$((i+1))
|
||||
}
|
||||
|
||||
add_syms() {
|
||||
local syms=$(readelf -s "$1" | grep -F UND | sed 1d | awk '{ print $8 }')
|
||||
symbols="$symbols $syms"
|
||||
}
|
||||
|
||||
for a; do
|
||||
if test -z "$cleared"; then
|
||||
set --; cleared=1
|
||||
fi
|
||||
case "$a" in
|
||||
-o) output="$a";;
|
||||
-l*) mksym_args="$mksym_args $a";;
|
||||
-*) set -- "$@" "$a";;
|
||||
*.c)
|
||||
add_out
|
||||
$CC $CFLAGS "$@" -c "$a" -o "$out"
|
||||
add_syms "$out";;
|
||||
*.cxx|*.cc|*.cpp)
|
||||
add_out
|
||||
$CXX $CXXFLAGS "$@" -c "$a" -o "$out"
|
||||
add_syms "$out";;
|
||||
*) printf "not sure what to do with input file: %s, bailing\n" "$a" &>2; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
./ldr/mksyms $mksym_args $symbols > obj/$output.syms.s
|
||||
nasm $ASFLAGS obj/$output.syms.s -o obj/$output.header.o
|
||||
ld $LDFLAGS obj/$output.header.o $files -o bin/$output
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import itertools
|
||||
import os.path
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
from smolparse import *
|
||||
from smolemit import *
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-m', '--target', default='', \
|
||||
help='architecture to generate asm code for (default: auto)')
|
||||
parser.add_argument('-l', '--library', metavar='LIB', action='append', \
|
||||
help='libraries to link against')
|
||||
parser.add_argument('-L', '--libdir', metavar='DIR', action='append', \
|
||||
help="directories to search libraries in")
|
||||
|
||||
parser.add_argument('--nasm', default=shutil.which('nasm'), \
|
||||
help="which nasm binary to use")
|
||||
parser.add_argument('--cc', default=shutil.which('cc'), \
|
||||
help="which cc binary to use")
|
||||
parser.add_argument('--scanelf', default=shutil.which('scanelf'), \
|
||||
help="which scanelf binary to use")
|
||||
parser.add_argument('--readelf', default=shutil.which('readelf'), \
|
||||
help="which readelf binary to use")
|
||||
parser.add_argument('input', nargs='+', help="input object file")
|
||||
parser.add_argument('output', type=argparse.FileType('w'), \
|
||||
help="output nasm file", default=sys.stdout)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.libdir is None: args.libdir = []
|
||||
arch = args.target.tolower() if len(args.target)!=0 \
|
||||
else decide_arch(args.input)
|
||||
if arch not in archmagic:
|
||||
eprintf("Unknown architecture '" + str(arch) + "'")
|
||||
sys.exit(1)
|
||||
|
||||
syms = get_needed_syms(args.readelf, args.input)
|
||||
|
||||
paths = get_cc_paths(args.cc)
|
||||
|
||||
spaths = args.libdir + paths['libraries']
|
||||
libraries=paths['libraries']
|
||||
libnames = args.library
|
||||
libs = list(find_libs(spaths, libnames))
|
||||
symbols = {}
|
||||
|
||||
for symbol in syms:
|
||||
library = find_symbol(args.scanelf, libs, libnames, symbol)
|
||||
if not library:
|
||||
eprintf("could not find symbol: {}".format(symbol))
|
||||
sys.exit(1)
|
||||
symbols.setdefault(library, [])
|
||||
symbols[library].append(symbol)
|
||||
|
||||
output(arch, symbols, args.output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
|
||||
def hash_djb2(s):
|
||||
h = 5381
|
||||
for c in s:
|
||||
h = (h * 33 + ord(c)) & 0xFFFFFFFF
|
||||
return h
|
||||
|
||||
def output_x86(libraries, outf):
|
||||
outf.write('; vim: set ft=nasm:\n') # be friendly
|
||||
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
|
||||
|
||||
outf.write('%include "header.asm"\n')
|
||||
outf.write('.dynamic.needed:\n')
|
||||
for library in libraries:
|
||||
outf.write('dd 1\n')
|
||||
outf.write('dd (_symbols.{} - _symbols)\n'.format(shorts[library]))
|
||||
outf.write('.dynamic.end:\n')
|
||||
outf.write('_symbols:\n')
|
||||
for library, symbols in libraries.items():
|
||||
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
|
||||
|
||||
for sym in symbols:
|
||||
hash = hash_djb2(sym)
|
||||
outf.write("""
|
||||
\t\tglobal {name}
|
||||
\t\t{name}: db 0xE9
|
||||
\t\t dd 0x{hash:x}
|
||||
""".format(name=sym, hash=hash).lstrip('\n'))
|
||||
|
||||
outf.write('\tdb 0\n')
|
||||
outf.write('db 0\n')
|
||||
outf.write('%include "loader.asm"\n')
|
||||
|
||||
def output(arch, libraries, outf):
|
||||
if arch == 'i386': output_x86(libraries, outf)
|
||||
##elif arch == 'arm':
|
||||
#elif arch == 'x86_64':
|
||||
###elif arch == 'aarch64':
|
||||
else:
|
||||
eprintf("E: cannot emit for arch '" + str(arch) + "'")
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
|
||||
import glob
|
||||
import os.path
|
||||
import subprocess
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
|
||||
def decide_arch(inpfiles):
|
||||
archs=set({})
|
||||
|
||||
for fp in inpfiles:
|
||||
with open(fp, 'rb') as ff:
|
||||
_ = ff.read(16) # ei_ident
|
||||
_ = ff.read( 2) # ei_type
|
||||
machine = ff.read(2) # ei_machine
|
||||
|
||||
machnum = struct.unpack('<H', machine)[0]
|
||||
archs.add(machnum)
|
||||
|
||||
if len(archs) != 1:
|
||||
eprintf("Input files have multiple architectures, can't link this...")
|
||||
sys.exit(1)
|
||||
|
||||
archn = list(archs)[0]
|
||||
|
||||
if archn not in archmagic:
|
||||
eprintf("Unknown architecture number " + str(archn) + \
|
||||
". Consult elf.h and rebuild your object files.")
|
||||
|
||||
return archmagic[archn]
|
||||
|
||||
def get_needed_syms(readelf_bin, inpfiles):
|
||||
output = subprocess.check_output([readelf_bin, '-s', '-W']+inpfiles,
|
||||
stderr=subprocess.DEVNULL)
|
||||
|
||||
syms=set({})
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
stuff = entry.split()
|
||||
if len(stuff)<8: continue
|
||||
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0:
|
||||
syms.add(stuff[7])
|
||||
|
||||
return syms
|
||||
|
||||
def get_cc_paths(cc_bin):
|
||||
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
paths = {}
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
category, path = entry.split(': ', 1)
|
||||
path = path.lstrip('=')
|
||||
paths[category] = list(set(os.path.realpath(p) \
|
||||
for p in path.split(':') if os.path.isdir(p)))
|
||||
return paths
|
||||
|
||||
def is_valid_elf(f):
|
||||
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
|
||||
|
||||
def find_lib(spaths, wanted):
|
||||
for p in spaths:
|
||||
for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
for f in glob.glob(glob.escape(p) + '/' + wanted + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
|
||||
|
||||
eprintf("E: couldn't find library '" + wanted + "'.")
|
||||
sys.exit(1)
|
||||
|
||||
def find_libs(spaths, wanted): return map(lambda l: find_lib(spaths, l), wanted)
|
||||
|
||||
def find_symbol(scanelf_bin, libraries, libnames, symbol):
|
||||
output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \
|
||||
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
sym, soname, path = entry.split(' ', 2)
|
||||
if symbol in sym.split(',') and \
|
||||
any(soname.startswith('lib'+l) for l in libnames):
|
||||
return soname
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
import sys
|
||||
|
||||
archmagic = {
|
||||
'i386': 3, 3: 'i386',
|
||||
##'arm': 40, 40: 'arm',
|
||||
#'x86_64': 62, 62: 'x86_64',
|
||||
###'aarch64': 183, 183: 'aarch64'
|
||||
}
|
||||
|
||||
def eprintf(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
#include <stddef.h>
|
||||
|
||||
extern int main(int argc, char* argv[]);
|
||||
extern int __libc_start_main(int (*main)(int, char**),
|
||||
int argc, char** argv,
|
||||
void (*init)(void), void(*fini)(void),
|
||||
void (*rtld_fini)(void),
|
||||
void* stack) __attribute__((__noreturn__));
|
||||
|
||||
__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__))
|
||||
int _start(void* stack) {
|
||||
int argc=*(int*)stack;
|
||||
char** argv=(void*)(&((int*)stack)[1]);
|
||||
|
||||
__libc_start_main(main, argc, argv, NULL, NULL, NULL, (void*)stack);
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%define EI_CLASS (1) ; 1 == 32-bit
|
||||
%define EI_DATA (1) ; 1 == little-endian
|
||||
%define EI_VERSION (1) ; current
|
||||
%define EI_OSABI (3) ; Linux
|
||||
%define EI_OSABIVERSION (1) ; current
|
||||
|
||||
%define ELF_TYPE (2) ; 2 == executable
|
||||
|
||||
%ifndef ELF_MACHINE
|
||||
%define ELF_MACHINE (3) ; 3 == i386
|
||||
%endif
|
||||
|
||||
%define PT_LOAD (1)
|
||||
%define PT_DYNAMIC (2)
|
||||
%define PT_INTERP (3)
|
||||
|
||||
%define PHDR_R (4)
|
||||
%define PHDR_W (2)
|
||||
%define PHDR_X (1)
|
||||
|
||||
%define DT_STRTAB (5)
|
||||
%define DT_SYMTAB (6)
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%define ORIGIN 0x400000
|
||||
;org ORIGIN
|
||||
bits 32
|
||||
|
||||
extern _smol_total_size
|
||||
[section .header]
|
||||
|
||||
%include "elf.inc"
|
||||
|
||||
header:
|
||||
; e_ident
|
||||
db 0x7F, "ELF"
|
||||
db EI_CLASS, EI_DATA, EI_VERSION, EI_OSABI
|
||||
db EI_OSABIVERSION
|
||||
times 7 db 0 ; EI_PAD, ld.so is a busta and won't let us use our leet
|
||||
; group tags for padding bytes :(
|
||||
dw ELF_TYPE ; e_type: 2 = executable
|
||||
dw ELF_MACHINE ; e_machine: 3 = x86
|
||||
dd EI_VERSION ; e_version
|
||||
dd _smol_start ; e_entry
|
||||
dd (.segments - header) ; e_phoff
|
||||
dd 0 ; e_shoff
|
||||
dd 0 ; e_flags
|
||||
dw (.segments - header) ; e_ehsize
|
||||
dw (.segments.load - .segments.dynamic) ; e_phentsize
|
||||
.segments:
|
||||
%ifdef USE_INTERP
|
||||
.segments.interp:
|
||||
dd PT_INTERP ; {e_phnum: 2, e_shentsize: 0}, p_type
|
||||
dd (.interp - header) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
|
||||
dd .interp, .interp ; p_vaddr, p_paddr
|
||||
dd (.interp.end-.interp) ; p_filesz
|
||||
dd (.interp.end-.interp) ; p_memsz
|
||||
dd 0,0 ; p_flags, p_align
|
||||
%endif
|
||||
.segments.dynamic:
|
||||
dd PT_DYNAMIC ; {e_phnum: 2, e_shentsize: 0}, p_type
|
||||
dd (.dynamic - header) ; {e_shnum: <junk>, e_shstrnd: <junk>}, p_offset
|
||||
dd .dynamic, 0 ; p_vaddr, p_paddr
|
||||
dd (.dynamic.end - .dynamic) ; p_filesz
|
||||
dd (.dynamic.end - .dynamic) ; p_memsz
|
||||
dd 0, 0 ; p_flags, p_align
|
||||
.segments.load:
|
||||
dd PT_LOAD ; p_type: 1 = PT_LOAD
|
||||
dd 0 ; p_offset
|
||||
dd ORIGIN, 0 ; p_vaddr, p_paddr
|
||||
dd _smol_total_size ; p_filesz
|
||||
dd _smol_total_size ; p_memsz
|
||||
dd (PHDR_R | PHDR_W | PHDR_X) ; p_flags
|
||||
dd 0x1000 ; p_align
|
||||
.segments.end:
|
||||
%ifdef USE_INTERP
|
||||
.interp:
|
||||
db "/lib/ld-linux.so.2",0
|
||||
.interp.end:
|
||||
%endif
|
||||
.dynamic:
|
||||
.dynamic.strtab:
|
||||
dd DT_STRTAB ; d_tag
|
||||
dd _symbols ; d_un.d_ptr
|
||||
.dynamic.symtab:
|
||||
; this is required to be present or ld.so will crash, but it can be bogus
|
||||
dd DT_SYMTAB ; d_tag: 6 = DT_SYMTAB
|
||||
dd 0 ; d_un.d_ptr
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
const char *f = "foo";
|
||||
|
||||
int main(void) {
|
||||
printf("hello world %s\n", f);
|
||||
exit(42);
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
; vim: set ft=nasm ts=8:
|
||||
|
||||
%include "rtld.inc"
|
||||
|
||||
[section .text.startup.smol]
|
||||
|
||||
|
||||
_smol_start:
|
||||
; try to get the 'version-agnostic' pffset of the stuff we're
|
||||
; interested in
|
||||
mov ebx, eax
|
||||
mov esi, eax
|
||||
.looper:
|
||||
lodsd
|
||||
cmp dword eax, _smol_start
|
||||
jne short .looper
|
||||
sub esi, ebx
|
||||
sub esi, LM_ENTRY_OFFSET_BASE+4 ; +4: take inc-after from lodsb into acct
|
||||
|
||||
xchg ebp, ebx
|
||||
xchg ebx, esi
|
||||
mov esi, _symbols
|
||||
|
||||
link: ; (struct link_map *root, char *symtable)
|
||||
.do_library: ; null library name means end of symbol table, we're done
|
||||
cmp byte [esi], 0
|
||||
jz .done
|
||||
.find_map_entry: ; compare basename(entry->l_name) to lib name, if so we got a match
|
||||
push esi
|
||||
mov esi, [ebp + LM_NAME_OFFSET]
|
||||
|
||||
.basename: ; (const char *s (esi))
|
||||
push esi
|
||||
push edi
|
||||
mov edi, esi
|
||||
.basename.cmp:
|
||||
lodsb
|
||||
or al, al
|
||||
jz short .basename.done
|
||||
cmp al, 47 ; '/'
|
||||
cmove edi, esi
|
||||
jmp short .basename.cmp
|
||||
.basename.done:
|
||||
xchg eax, edi
|
||||
pop edi
|
||||
pop esi
|
||||
.basename.end:
|
||||
|
||||
mov edi, eax
|
||||
pop esi
|
||||
.strcmp: ; (const char *s1 (esi), const char *s2 (edi))
|
||||
push esi
|
||||
push edi
|
||||
.strcmp.cmp:
|
||||
lodsb
|
||||
or al, al
|
||||
jz short .strcmp.done
|
||||
sub al, [edi]
|
||||
jnz short .strcmp.done
|
||||
inc edi
|
||||
jmp short .strcmp.cmp
|
||||
.strcmp.done:
|
||||
pop edi
|
||||
pop esi
|
||||
.strcmp.end:
|
||||
|
||||
|
||||
jz short .process_map_entry
|
||||
; no match, next entry it is!
|
||||
mov ebp, [ebp + LM_NEXT_OFFSET]
|
||||
jmp short .find_map_entry
|
||||
.process_map_entry: ; skip past the name in the symbol table now to get to the symbols
|
||||
lodsb
|
||||
or al, al
|
||||
jnz short .process_map_entry
|
||||
|
||||
.do_symbols: ; null byte means end of symbols for this library!
|
||||
lodsb
|
||||
test al, al
|
||||
jz short .next_library
|
||||
push ebx
|
||||
xchg ebx, edi
|
||||
|
||||
link_symbol: ; (struct link_map *entry, uint32_t *h)
|
||||
mov ecx, esi
|
||||
|
||||
; eax = *h % entry->l_nbuckets
|
||||
mov eax, [ecx]
|
||||
xor edx, edx
|
||||
mov ebx, [ebp + edi + LM_NBUCKETS_OFFSET]
|
||||
div ebx
|
||||
; eax = entry->l_gnu_buckets[eax]
|
||||
mov eax, [ebp + edi + LM_GNU_BUCKETS_OFFSET]
|
||||
mov eax, [eax + edx * 4]
|
||||
; *h |= 1
|
||||
or word [ecx], 1
|
||||
.check_bucket: ; edx = entry->l_gnu_chain_zero[eax] | 1
|
||||
mov edx, [ebp + edi + LM_GNU_CHAIN_ZERO_OFFSET]
|
||||
mov edx, [edx + eax * 4]
|
||||
or edx, 1
|
||||
; check if this is our symbol
|
||||
cmp edx, [ecx]
|
||||
je short .found
|
||||
inc eax
|
||||
jmp short .check_bucket
|
||||
.found: ; it is! edx = entry->l_info[DT_SYMTAB]->d_un.d_ptr
|
||||
mov edx, [ebp + LM_INFO_OFFSET + DT_SYMTAB * 4]
|
||||
mov edx, [edx + DYN_PTR_OFFSET]
|
||||
; edx = edx[eax].dt_value + entry->l_addr
|
||||
shl eax, DT_SYMSIZE_SHIFT
|
||||
mov edx, [edx + eax + DT_VALUE_OFFSET]
|
||||
add edx, [ebp + LM_ADDR_OFFSET]
|
||||
sub edx, ecx
|
||||
sub edx, 4
|
||||
; finally, write it back!
|
||||
mov [ecx], edx
|
||||
|
||||
pop ebx
|
||||
add esi, 4
|
||||
jmp short link.do_symbols
|
||||
inc esi
|
||||
link.next_library:
|
||||
jmp link.do_library
|
||||
link.done:
|
||||
|
||||
;xor ebp, ebp ; let's put that burden on the user code, so they can leave
|
||||
; it out if they want to
|
||||
|
||||
sub esp, 20 ; put the stack where _stack (C code) expects it to be
|
||||
; this can't be left out, because X needs the envvars
|
||||
|
||||
;.loopme: jmp short .loopme
|
||||
;jmp short _start
|
||||
; by abusing the linker script, _start ends up right here :)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%define LM_NAME_OFFSET 0x4
|
||||
%define LM_NEXT_OFFSET 0xC
|
||||
%define LM_ADDR_OFFSET 0
|
||||
%define LM_INFO_OFFSET 0x20
|
||||
|
||||
; by default, use the offset 'correction' from glibc 2.28
|
||||
%define LM_ENTRY_OFFSET_BASE 340
|
||||
|
||||
%define LM_NBUCKETS_OFFSET 0x178
|
||||
%define LM_GNU_BUCKETS_OFFSET 0x188
|
||||
%define LM_GNU_CHAIN_ZERO_OFFSET 0x18C
|
||||
|
||||
%define DT_VALUE_OFFSET 0x4
|
||||
%define DYN_PTR_OFFSET 0x4
|
||||
|
||||
%define DT_SYMTAB 0x6
|
||||
%define DT_SYMSIZE_SHIFT 4
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
const char *f = "foo";
|
||||
|
||||
__attribute__((__externally_visible__, __section__(".text._start"), __noreturn__))
|
||||
__attribute__((__externally_visible__, __section__(".text.startup._start"), __noreturn__))
|
||||
int _start(void) {
|
||||
printf("hello world %s\n", f);
|
||||
exit(42);
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
const char *f = "foo";
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
printf("hello world %s\n", f);
|
||||
printf("argc=%d\n", argc);
|
||||
for (int i = 0; i < argc; ++i) printf("argv[%d]=%s\n", i, argv[i]);
|
||||
exit(42);
|
||||
}
|
||||
|
|
@ -7,3 +7,4 @@ int main(void) {
|
|||
SDL_DestroyWindow(w);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue