mirror of https://github.com/Shizmob/smol
reorganize everything into one invocation
This commit is contained in:
parent
d9dbaae27a
commit
161dc41290
|
@ -1,3 +1,3 @@
|
|||
/bin
|
||||
/obj
|
||||
/__pycache__
|
||||
__pycache__
|
||||
|
|
43
Makefile
43
Makefile
|
@ -1,6 +1,6 @@
|
|||
OBJDIR := obj
|
||||
BINDIR := bin
|
||||
SRCDIR := src
|
||||
SRCDIR := ldr
|
||||
LDDIR := ld
|
||||
TESTDIR:= test
|
||||
|
||||
|
@ -40,13 +40,14 @@ CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
|
|||
|
||||
LIBS=-lc
|
||||
|
||||
SMOLFLAGS +=
|
||||
ASFLAGS += -DUSE_INTERP -DALIGN_STACK
|
||||
#-DUSE_DNLOAD_LOADER #-DUSE_DT_DEBUG #-DUSE_DL_FINI #-DNO_START_ARG #-DUNSAFE_DYNAMIC
|
||||
SMOLFLAGS += --smol-opt align_stack
|
||||
# use_dnload_loader, use_dt_debug, use_dl_fini, no_start_arg, unsafe_dynamic
|
||||
|
||||
NASM ?= nasm
|
||||
PYTHON3 ?= python3
|
||||
|
||||
SMOLFLAGS += --nasm=$(NASM)
|
||||
|
||||
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start
|
||||
|
||||
LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
|
||||
|
@ -54,41 +55,33 @@ LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
|
|||
clean:
|
||||
@$(RM) -vrf $(OBJDIR) $(BINDIR)
|
||||
|
||||
%/:
|
||||
$(OBJDIR)/ $(BINDIR)/:
|
||||
@mkdir -vp "$@"
|
||||
|
||||
.SECONDARY:
|
||||
|
||||
$(OBJDIR)/%.lto.o: $(SRCDIR)/%.c $(OBJDIR)/
|
||||
$(OBJDIR)/%.lto.o: $(SRCDIR)/%.c | $(OBJDIR)/
|
||||
$(CC) -flto $(CFLAGS) -c "$<" -o "$@"
|
||||
$(OBJDIR)/%.lto.o: $(TESTDIR)/%.c $(OBJDIR)/
|
||||
$(OBJDIR)/%.lto.o: $(TESTDIR)/%.c | $(OBJDIR)/
|
||||
$(CC) -flto $(CFLAGS) -c "$<" -o "$@"
|
||||
|
||||
$(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/
|
||||
$(OBJDIR)/%.start.o: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o
|
||||
$(CC) $(LDFLAGS) -r -o "$@" $^
|
||||
|
||||
$(OBJDIR)/%.o: $(SRCDIR)/%.c | $(OBJDIR)/
|
||||
$(CC) $(CFLAGS) -c "$<" -o "$@"
|
||||
$(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/
|
||||
$(OBJDIR)/%.o: $(TESTDIR)/%.c | $(OBJDIR)/
|
||||
$(CC) $(CFLAGS) -c "$<" -o "$@"
|
||||
|
||||
$(OBJDIR)/%.start.o: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o
|
||||
$(CC) $(LDFLAGS) -r -o "$@" $^
|
||||
|
||||
$(OBJDIR)/symbols.%.asm: $(OBJDIR)/%.o
|
||||
$(PYTHON3) ./smol.py $(SMOLFLAGS) $(LIBS) "$<" "$@"
|
||||
$(BINDIR)/%: $(OBJDIR)/%.o | $(BINDIR)/
|
||||
./smold $(SMOLFLAGS) $(LIBS) $^ -o $@
|
||||
./smoltrunc "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@"
|
||||
|
||||
$(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header32.asm \
|
||||
$(SRCDIR)/loader32.asm
|
||||
$(NASM) $(ASFLAGS) $< -o $@
|
||||
|
||||
$(OBJDIR)/stub.%.start.o: $(OBJDIR)/symbols.%.start.asm $(SRCDIR)/header32.asm \
|
||||
$(SRCDIR)/loader32.asm
|
||||
$(NASM) $(ASFLAGS) $< -o $@
|
||||
|
||||
$(BINDIR)/%: $(OBJDIR)/%.o $(OBJDIR)/stub.%.o $(BINDIR)/
|
||||
$(CC) -Wl,-Map=$(BINDIR)/$*.map $(LDFLAGS_) $(OBJDIR)/$*.o $(OBJDIR)/stub.$*.o -o "$@"
|
||||
./rmtrailzero.py "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@"
|
||||
|
||||
$(BINDIR)/%-crt: $(OBJDIR)/%.start.o $(OBJDIR)/stub.%.start.o $(BINDIR)/
|
||||
$(CC) -Wl,-Map=$(BINDIR)/$*-crt.map $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.start.o -o "$@"
|
||||
$(BINDIR)/%-crt: $(OBJDIR)/%.start.o | $(BINDIR)/
|
||||
./smold $(SMOLFLAGS) $(LIBS) $^ -o $@
|
||||
|
||||
.PHONY: all clean
|
||||
|
||||
|
|
60
README.md
60
README.md
|
@ -2,47 +2,49 @@
|
|||
|
||||
Shoddy minsize-oriented linker
|
||||
|
||||
PoC by Shiz, bugfixing and 64-bit version by PoroCYon.
|
||||
PoC and rewrite by Shiz, bugfixing and 64-bit version by PoroCYon.
|
||||
|
||||
## Requirements
|
||||
|
||||
A functional toolchain, Python 3 and `pax-utils`.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
./smol.py -lfoo -lbar input.o... smol-output.asm
|
||||
nasm -I src/ [-Doption ...] -o nasm-output.o smol-output.asm
|
||||
ld -T ld/link.ld --oformat=binary -o output.elf nasm-output.o input.o...
|
||||
# or cc -T ld/link.ld -Wl,--oformat=binary -o output.elf nasm-output.o input.o...
|
||||
./smol.py [LDFLAGS...] [--smol-opt=...] -lfoo -lbar input.o -o smol-output
|
||||
```
|
||||
|
||||
* `USE_INTERP`: Include an interp segment in the output ELF file. If not, the
|
||||
dynamic linker **must** be invoked *explicitely*! (You probably want to
|
||||
enable this.) Costs the size of a phdr plus the size of the interp string.
|
||||
* `ALIGN_STACK`: *64-bit only*: realign the stack so that SSE instructions
|
||||
Possible `smol-opt`s:
|
||||
* `use_interp` (default): Include an interp segment in the output ELF file.
|
||||
If not, the dynamic linker **must** be invoked *explicitely*!
|
||||
Costs the size of a phdr plus the size of the interp string.
|
||||
* `align_stack`: *64-bit only*: realign the stack so that SSE instructions
|
||||
won't segfault. Costs 1 byte.
|
||||
* `USE_NX`: Don't use `RWE` segments at all. Not very well tested. Costs the
|
||||
* `use_nx`: Don't use `RWE` segments at all. Not very well tested. Costs the
|
||||
size of 1 phdr.
|
||||
* `USE_DL_FINI`: keep track of the `_dl_fini` function and pass it to your
|
||||
* `use_dl_fini`: keep track of the `_dl_fini` function and pass it to your
|
||||
`_start`. Costs 2 bytes, plus maybe a few more depending on how it's passed
|
||||
to `__libc_start_main`.
|
||||
* `USE_DT_DEBUG`: retrieve the `struct link_map` from the `r_debug` linker
|
||||
* `use_dt_debug`: retrieve the `struct link_map` from the `r_debug` linker
|
||||
data (which is placed at `DT_DEBUG` at startup) instead of exploiting data
|
||||
leakage from `_dt_start_user`. Might be more compatible and compressable, but
|
||||
strictly worse size-wise by 10 (i386) or 3 (x86_64) bytes.
|
||||
* `SKIP_ENTRIES`: skip the first two entries of the `struct link_map`, which
|
||||
* `skip_entries`: skip the first two entries of the `struct link_map`, which
|
||||
represent the main binary and the vDSO. Costs around 5 bytes.
|
||||
* `USE_DNLOAD_LOADER`: *64-bit only*: use the symbol loading mechanism as used
|
||||
* `use_dnload_loader`: *64-bit only*: use the symbol loading mechanism as used
|
||||
in dnload (i.e. traverse the symtab of the imported libraries). Slightly
|
||||
larger, but probably better compressable.
|
||||
* `NO_START_ARG`: *don't* pass the stack pointer to `_start` as the first arg.
|
||||
* `no_start_arg`: *don't* pass the stack pointer to `_start` as the first arg.
|
||||
Will make it unable to read argc/argv/environ, but gives you 3 bytes.
|
||||
|
||||
```
|
||||
usage: smol.py [-h] [-m TARGET] [-l LIB] [-L DIR] [--nasm NASM] [--cc CC]
|
||||
[--scanelf SCANELF] [--readelf READELF]
|
||||
input [input ...] output
|
||||
usage: smold [-h] [-m TARGET] [-l LIB] [-L DIR] [--smol-opt OPT]
|
||||
[--loader-dir LOADER_DIR] [--nasm NASM] [--ld LD] [--cc CC]
|
||||
[--scanelf SCANELF] [--readelf READELF] [-o OUT]
|
||||
input [input ...]
|
||||
|
||||
positional arguments:
|
||||
input input object file
|
||||
output output nasm file
|
||||
input input object file(s)
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
|
@ -51,27 +53,32 @@ optional arguments:
|
|||
-l LIB, --library LIB
|
||||
libraries to link against
|
||||
-L DIR, --libdir DIR directories to search libraries in
|
||||
--smol-opt OPT optimization flags for smol
|
||||
--loader-dir LOADER_DIR
|
||||
path to loader files
|
||||
--nasm NASM which nasm binary to use
|
||||
--ld LD which ld binary to use
|
||||
--cc CC which cc binary to use
|
||||
--scanelf SCANELF which scanelf binary to use
|
||||
--readelf READELF which readelf binary to use
|
||||
-o OUT, --output OUT output binary
|
||||
```
|
||||
|
||||
A minimal crt (and `_start` funcion) are provided in case you want to use `main`.
|
||||
|
||||
## smoldd
|
||||
|
||||
`smoldd.py` is a script that tries to resolve all symbols from the hashes when
|
||||
`smoldd` is a script that tries to resolve all symbols from the hashes when
|
||||
imported by a `smol`-ified binary. This can thus be used to detect user mistakes
|
||||
during dynamic linking. (Think of it as an equivalent of `ldd`, except that it
|
||||
also checks whether the imported functions are present as well.)
|
||||
|
||||
***NOTE***: `smoldd.py` currently doesn't support 64-bit binaries anymore, as
|
||||
***NOTE***: `smoldd` currently doesn't support 64-bit binaries anymore, as
|
||||
there's currently no (good) way of retrieving the symbol hash table anymore.
|
||||
|
||||
## Internal workings
|
||||
|
||||
`smol.py` inspects the input object files for needed library files and symbols.
|
||||
`smold` inspects the input object files for needed library files and symbols.
|
||||
It then outputs the list of needed libraries, hashes of the needed symbols and
|
||||
provides stubs for the external functions. This is then combined with a
|
||||
custom-made, small ELF header and 'runtime linker' which resolves the symbols
|
||||
|
@ -129,9 +136,12 @@ and 0, and will thus pretty much never change.)
|
|||
|
||||
## Greets
|
||||
|
||||
auld alrj blackle breadbox faemiyah gib3&tix0 las leblane parcelshit unlord
|
||||
```
|
||||
auld / alrj / blackle / breadbox / Calodox
|
||||
faemiyah / gib3 & tix0 / las / leblane
|
||||
parcelshit / PWP / Team210 / unlord / yx
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
[WTFPL](/LICENSE)
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%if __BITS__ == 32
|
||||
%define EI_CLASS (1) ; 1 == 32-bit
|
||||
%else
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%include "linkscr.inc"
|
||||
|
||||
[section .header]
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%include "linkscr.inc"
|
||||
|
||||
[section .header]
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
extern _smol_origin
|
||||
extern _smol_total_size
|
||||
extern _smol_text_start
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm ts=8:
|
||||
|
||||
%include "rtld.inc"
|
||||
|
||||
%ifdef ELF_TYPE
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
;%define R10_BIAS (0x2B4)
|
||||
%define R10_BIAS (0x2B4+0x40)
|
||||
|
|
@ -1,5 +1,3 @@
|
|||
; vim: set ft=nasm:
|
||||
|
||||
%if __BITS__ == 32
|
||||
%define LM_NAME_OFFSET 0x4
|
||||
%define LM_NEXT_OFFSET 0xC
|
73
smol.py
73
smol.py
|
@ -1,73 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import itertools
|
||||
import os.path
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
from smolparse import *
|
||||
from smolemit import *
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-m', '--target', default='', \
|
||||
help='architecture to generate asm code for (default: auto)')
|
||||
parser.add_argument('-l', '--library', metavar='LIB', action='append', \
|
||||
help='libraries to link against')
|
||||
parser.add_argument('-L', '--libdir', metavar='DIR', action='append', \
|
||||
help="directories to search libraries in")
|
||||
|
||||
parser.add_argument('--nasm', default=shutil.which('nasm'), \
|
||||
help="which nasm binary to use")
|
||||
parser.add_argument('--cc', default=shutil.which('cc'), \
|
||||
help="which cc binary to use")
|
||||
parser.add_argument('--scanelf', default=shutil.which('scanelf'), \
|
||||
help="which scanelf binary to use")
|
||||
parser.add_argument('--readelf', default=shutil.which('readelf'), \
|
||||
help="which readelf binary to use")
|
||||
|
||||
# parser.add_argument('-d', '--dnload', default=False, action='store_true', \
|
||||
# help="Use dnload's mechanism of importing functions. Slightly larger, but usually better compressable.")
|
||||
# parser.add_argument('--libsep', default=False, action='store_true', \
|
||||
# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
|
||||
|
||||
parser.add_argument('input', nargs='+', help="input object file")
|
||||
parser.add_argument('output', type=argparse.FileType('w'), \
|
||||
help="output nasm file", default=sys.stdout)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.libdir is None: args.libdir = []
|
||||
arch = args.target.tolower() if len(args.target)!=0 \
|
||||
else decide_arch(args.input)
|
||||
if arch not in archmagic:
|
||||
eprintf("Unknown architecture '" + str(arch) + "'")
|
||||
sys.exit(1)
|
||||
|
||||
syms = get_needed_syms(args.readelf, args.input)
|
||||
|
||||
paths = get_cc_paths(args.cc)
|
||||
|
||||
spaths = args.libdir + paths['libraries']
|
||||
libraries=paths['libraries']
|
||||
libnames = args.library
|
||||
libs = list(find_libs(spaths, libnames))
|
||||
symbols = {}
|
||||
|
||||
for symbol, reloc in syms:
|
||||
library = find_symbol(args.scanelf, libs, libnames, symbol)
|
||||
if not library:
|
||||
eprintf("could not find symbol: {}".format(symbol))
|
||||
sys.exit(1)
|
||||
symbols.setdefault(library, [])
|
||||
symbols[library].append((symbol, reloc))
|
||||
|
||||
output(arch, symbols, args.output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
import enum
|
||||
|
||||
|
||||
class ELFMachine(enum.IntEnum):
|
||||
i386 = 3
|
||||
x86_64 = 62
|
||||
|
||||
|
||||
ELF_DEFAULT_BITS = {
|
||||
ELFMachine.i386: 32,
|
||||
ELFMachine.x86_64: 64
|
||||
}
|
|
@ -1,39 +1,31 @@
|
|||
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
from .util import error, hash_djb2
|
||||
from .elf import ELFMachine
|
||||
|
||||
|
||||
def output_x86(libraries, outf):
|
||||
outf.write('; vim: set ft=nasm:\n') # be friendly
|
||||
outf.write('bits 32\n')
|
||||
|
||||
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
|
||||
|
||||
outf.write('%include "header32.asm"\n')
|
||||
outf.write('%include "header-x86.asm"\n')
|
||||
outf.write('dynamic.needed:\n')
|
||||
for library in libraries:
|
||||
outf.write('dd 1;DT_NEEDED\n')
|
||||
outf.write('dd (_symbols.{} - _strtab)\n'.format(shorts[library]))
|
||||
outf.write('dynamic.end:\n')
|
||||
|
||||
# if needgot:
|
||||
# outf.write('global _GLOBAL_OFFSET_TABLE_\n')
|
||||
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
|
||||
# outf.write('dd dynamic\n')
|
||||
outf.write('_strtab:\n')
|
||||
# if not libsep:
|
||||
# for library, symrels in libraries.items():
|
||||
# outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
|
||||
|
||||
outf.write('_symbols:\n')
|
||||
for library, symrels in libraries.items():
|
||||
# if libsep:
|
||||
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
|
||||
|
||||
for sym, reloc in symrels:
|
||||
# meh
|
||||
if reloc != 'R_386_PC32':
|
||||
eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
|
||||
error('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
|
||||
sys.exit(1)
|
||||
|
||||
hash = hash_djb2(sym)
|
||||
|
@ -47,17 +39,15 @@ def output_x86(libraries, outf):
|
|||
outf.write('db 0\n')
|
||||
outf.write('_symbols.end:\n')
|
||||
|
||||
outf.write('%include "loader32.asm"\n')
|
||||
# end output_x86
|
||||
outf.write('%include "loader-x86.asm"\n')
|
||||
|
||||
|
||||
def output_amd64(libraries, outf):
|
||||
outf.write('; vim: set ft=nasm:\n')
|
||||
def output_x86_64(libraries, outf):
|
||||
outf.write('bits 64\n')
|
||||
|
||||
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
|
||||
|
||||
outf.write('%include "header64.asm"\n')
|
||||
outf.write('%include "header-x86_64.asm"\n')
|
||||
outf.write('dynamic.needed:\n')
|
||||
for library in libraries:
|
||||
outf.write(' dq 1;DT_NEEDED\n')
|
||||
|
@ -112,14 +102,16 @@ global {name}
|
|||
""".format(lib=shorts[library],name=sym).lstrip('\n'))
|
||||
|
||||
outf.write('_smolplt.end:\n')
|
||||
outf.write('%include "loader64.asm"\n')
|
||||
# end output_amd64
|
||||
outf.write('%include "loader-x86_64.asm"\n')
|
||||
|
||||
|
||||
def output(arch, libraries, outf):
|
||||
if arch == 'i386': output_x86(libraries, outf)
|
||||
elif arch == 'x86_64': output_amd64(libraries, outf)
|
||||
def output_table(arch, libraries, outf):
|
||||
if arch == ELFMachine.i386:
|
||||
return output_x86(libraries, outf)
|
||||
elif arch == ELFMachine.x86_64:
|
||||
return output_x86_64(libraries, outf)
|
||||
else:
|
||||
error('')
|
||||
eprintf("E: cannot emit for arch '" + str(arch) + "'")
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
|
||||
import glob
|
||||
import os.path
|
||||
import subprocess
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from .elf import ELFMachine
|
||||
from .util import warn, error
|
||||
|
||||
|
||||
def decide_arch(files):
|
||||
machine = None
|
||||
|
||||
for fn in files:
|
||||
with open(fn, 'rb') as f:
|
||||
ident = f.read(16) # ei_ident
|
||||
clas = ident[4] * 32 # EI_CLASS
|
||||
_ = f.read(2) # ei_type
|
||||
mach = f.read(2) # ei_machine
|
||||
machid = struct.unpack('<H', mach)[0]
|
||||
if machine is not None and machine != machid:
|
||||
warn('Input files have multiple architectures, can\'t link this...')
|
||||
return None, None
|
||||
machine = machid
|
||||
|
||||
if machine not in set(item.value for item in ELFMachine):
|
||||
warn('Unsupported machine ID: {}'.format(machine))
|
||||
warn('If you are sure this is correct, contact us to add support!')
|
||||
return None, None
|
||||
|
||||
return ELFMachine(machine), clas
|
||||
|
||||
def build_reloc_typ_table(reo):
|
||||
relocs = {}
|
||||
|
||||
for s in reo.decode('utf-8').splitlines():
|
||||
cols = s.split()
|
||||
|
||||
# prolly a 'header' line
|
||||
if len(cols) < 5:
|
||||
continue
|
||||
|
||||
# yes, we're assuming every reference to the same symbol will use the
|
||||
# same relocation type. if this isn't the case, your compiler flags are
|
||||
# stupid
|
||||
relocs[cols[4]] = cols[2]
|
||||
|
||||
return relocs
|
||||
|
||||
def get_needed_syms(readelf_bin, inpfiles):
|
||||
output = subprocess.check_output([readelf_bin, '-s', '-W'] + inpfiles,
|
||||
stderr=subprocess.DEVNULL)
|
||||
outrel = subprocess.check_output([readelf_bin, '-r', '-W'] + inpfiles,
|
||||
stderr=subprocess.DEVNULL)
|
||||
relocs = build_reloc_typ_table(outrel)
|
||||
|
||||
syms = set()
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
cols = entry.split()
|
||||
if len(cols) < 8:
|
||||
continue
|
||||
if cols[4] == "GLOBAL" and cols[6] == "UND" and cols[7] and cols[7] in relocs:
|
||||
syms.add((cols[7], relocs[cols[7]]))
|
||||
|
||||
return syms
|
||||
|
||||
def get_cc_paths(cc_bin):
|
||||
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
paths = {}
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
category, path = entry.split(': ', 1)
|
||||
path = path.lstrip('=')
|
||||
paths[category] = list(set(os.path.realpath(p) \
|
||||
for p in path.split(':') if os.path.isdir(p)))
|
||||
return paths
|
||||
|
||||
def is_valid_elf(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return f.read(4) == b'\x7FELF'
|
||||
|
||||
def find_lib(spaths, wanted):
|
||||
for p in spaths:
|
||||
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f):
|
||||
return f
|
||||
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f):
|
||||
return f
|
||||
|
||||
return None
|
||||
|
||||
def find_symbol(scanelf_bin, libraries, libnames, symbol):
|
||||
output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \
|
||||
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
sym, soname, path = entry.split(' ', 2)
|
||||
if symbol in sym.split(',') and \
|
||||
any(soname.startswith('lib'+l) for l in libnames):
|
||||
return soname
|
||||
|
||||
return None
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
import sys
|
||||
import struct
|
||||
|
||||
|
||||
def log(msg, file=sys.stdout):
|
||||
file.write(msg)
|
||||
file.write('\n')
|
||||
|
||||
def warn(msg):
|
||||
log(msg, file=sys.stderr)
|
||||
|
||||
def error(msg):
|
||||
log(msg, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def hash_djb2(s):
|
||||
h = 5381
|
||||
for c in s:
|
||||
h = (h * 33 + ord(c)) & 0xFFFFFFFF
|
||||
return h
|
||||
|
||||
def readbyte(blob, off):
|
||||
return struct.unpack('<B', blob[off:off+1])[0], (off+1)
|
||||
|
||||
def readint(blob, off):
|
||||
return struct.unpack('<I', blob[off:off+4])[0], (off+4)
|
||||
|
||||
def readlong(blob, off):
|
||||
return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
|
||||
|
||||
def readstr(blob, off):
|
||||
text = bytearray()
|
||||
while True:
|
||||
char, off = readbyte(blob, off)
|
||||
if char == 0:
|
||||
break
|
||||
|
||||
text.append(char)
|
||||
|
||||
return text.decode('utf-8'), off
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
from os import path
|
||||
import sys
|
||||
import argparse
|
||||
import shutil
|
||||
import tempfile
|
||||
import subprocess
|
||||
|
||||
from smol.util import error
|
||||
from smol.elf import ELFMachine, ELF_DEFAULT_BITS
|
||||
from smol.parse import decide_arch, find_lib, find_symbol, get_cc_paths, get_needed_syms
|
||||
from smol.emit import output_table
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-m', '--target',
|
||||
help='architecture to generate asm code for (default: auto)')
|
||||
parser.add_argument('-l', '--library', metavar='LIB', action='append',
|
||||
help='libraries to link against')
|
||||
parser.add_argument('-L', '--libdir', metavar='DIR', action='append',
|
||||
help='directories to search libraries in')
|
||||
|
||||
parser.add_argument('--smol-opt', metavar='OPT', default=['use_interp'], action='append',
|
||||
help='optimization flags for smol')
|
||||
parser.add_argument('--loader-dir', default=path.join(path.dirname(__file__), 'ldr'),
|
||||
help='path to loader files')
|
||||
parser.add_argument('--nasm', default=os.getenv('NASM') or shutil.which('nasm'),
|
||||
help='which nasm binary to use')
|
||||
parser.add_argument('--ld', default=os.getenv('LD') or shutil.which('ld'),
|
||||
help='which ld binary to use')
|
||||
parser.add_argument('--cc', default=os.getenv('CC') or shutil.which('cc'),
|
||||
help='which cc binary to use')
|
||||
parser.add_argument('--scanelf', default=os.getenv('SCANELF') or shutil.which('scanelf'),
|
||||
help='which scanelf binary to use')
|
||||
parser.add_argument('--readelf', default=os.getenv('READELF') or shutil.which('readelf'),
|
||||
help='which readelf binary to use')
|
||||
|
||||
parser.add_argument('input', nargs='+', help='input object file(s)')
|
||||
parser.add_argument('-o', '--output', metavar='OUT', default='smol.out', help='output binary')
|
||||
|
||||
args, ld_args = parser.parse_known_args()
|
||||
for util in ['nasm', 'cc', 'scanelf', 'readelf', 'ld']:
|
||||
if not getattr(args, util):
|
||||
parser.error('utility "{u}" could not be found! either install it or pass the path with --{u}'
|
||||
.format(util))
|
||||
|
||||
opts = set()
|
||||
for opt in args.smol_opt[:]:
|
||||
if opt.startswith('-'):
|
||||
opts.discard(opt[1:])
|
||||
continue
|
||||
opts.add(opt)
|
||||
|
||||
args.library = args.library or []
|
||||
args.libdir = args.libdir or []
|
||||
if args.target:
|
||||
arch = args.target.tolower().replace('elf_', '')
|
||||
if arch not in ELFMachine:
|
||||
parser.error('unknown architecture: {}'.format(arch))
|
||||
arch = ELFMachine(arch)
|
||||
bits = ELF_DEFAULT_BITS[arch]
|
||||
else:
|
||||
arch, bits = decide_arch(args.input)
|
||||
if not arch or not bits:
|
||||
error('Invalid architecture!')
|
||||
|
||||
syms = get_needed_syms(args.readelf, args.input)
|
||||
paths = get_cc_paths(args.cc)
|
||||
|
||||
libdirs = args.libdir + paths['libraries']
|
||||
libs = []
|
||||
libnames = args.library
|
||||
for libname in libnames:
|
||||
lib = find_lib(libdirs, libname)
|
||||
if not lib:
|
||||
error('could not find library: {}'.format(libname))
|
||||
libs.append(lib)
|
||||
|
||||
symbols = {}
|
||||
for symbol, reloc in syms:
|
||||
library = find_symbol(args.scanelf, libs, libnames, symbol)
|
||||
if not library:
|
||||
error("could not find symbol: {}".format(symbol))
|
||||
symbols.setdefault(library, [])
|
||||
symbols[library].append((symbol, reloc))
|
||||
|
||||
as_args = ['-D' + opt.upper() for opt in opts]
|
||||
with tempfile.NamedTemporaryFile('w', suffix='.s') as table, tempfile.NamedTemporaryFile('w', suffix='.o') as tableobj:
|
||||
output_table(arch, symbols, table)
|
||||
table.flush()
|
||||
try:
|
||||
subprocess.check_call([args.nasm] + as_args + ['-I', args.loader_dir + '/', '-f', 'elf{}'.format(bits), table.name, '-o', tableobj.name])
|
||||
subprocess.check_call([args.ld, '-T', os.path.join(args.loader_dir, 'link.ld'), '--oformat=binary', '-o', args.output, tableobj.name] + ld_args + args.input)
|
||||
except subprocess.CalledProcessError:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -8,27 +8,9 @@ import subprocess
|
|||
import struct
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
from smol.util import hash_djb2, readint, readstr, error
|
||||
from smol.elf import ELFMachine
|
||||
|
||||
def readbyte(blob, off):
|
||||
return struct.unpack('<B', blob[off:off+1])[0], (off+1)
|
||||
|
||||
def readint(blob, off):
|
||||
return struct.unpack('<I', blob[off:off+4])[0], (off+4)
|
||||
|
||||
def readlong(blob, off):
|
||||
return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
|
||||
|
||||
def readstr(blob, off):
|
||||
text = bytearray()
|
||||
while True:
|
||||
char, off = readbyte(blob, off)
|
||||
if char == 0:
|
||||
break
|
||||
|
||||
text.append(char)
|
||||
|
||||
return text.decode('utf-8'), off
|
||||
|
||||
def get_def_libpaths(cc_bin, is32bit):
|
||||
if is32bit:
|
||||
|
@ -37,10 +19,10 @@ def get_def_libpaths(cc_bin, is32bit):
|
|||
out = subprocess.check_output([cc_bin, '-print-search-dirs'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
|
||||
stuff = dict({})
|
||||
stuff = {}
|
||||
for l in out.decode('utf-8').splitlines():
|
||||
blah = l.split(': ')
|
||||
stuff[blah[0]] = blah[1].lstrip('=').split(':')
|
||||
cols = l.split(': ')
|
||||
stuff[cols[0]] = cols[1].lstrip('=').split(':')
|
||||
|
||||
return stuff["libraries"]
|
||||
|
||||
|
@ -55,14 +37,14 @@ def build_hashtab(scanelf_bin, lib):
|
|||
out = subprocess.check_output([scanelf_bin, '-B', '-F', '%s', '-s', '%pd%*', lib],
|
||||
stderr=subprocess.DEVNULL)
|
||||
|
||||
blah = set(out.decode('utf-8').split('\n'))
|
||||
ret = dict({})
|
||||
lines = set(out.decode('utf-8').split('\n'))
|
||||
ret = {}
|
||||
|
||||
for x in blah:
|
||||
y = x.split()
|
||||
if len(y) != 7:
|
||||
for line in lines:
|
||||
cols = line.split()
|
||||
if len(cols) != 7:
|
||||
continue
|
||||
ret[hash_djb2(y[6])] = y[6]
|
||||
ret[hash_djb2(cols[6])] = cols[6]
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -79,31 +61,30 @@ def main():
|
|||
|
||||
blob = args.input.read()
|
||||
|
||||
bits = blob[4] * 32
|
||||
machnum = struct.unpack('<H', blob[18:18+2])[0]
|
||||
try:
|
||||
machine = ELFMachine(machnum)
|
||||
except:
|
||||
error('unknown architecture: {}'.format(machnum))
|
||||
|
||||
is32bit = machnum == archmagic['i386']
|
||||
|
||||
deflibs = get_def_libpaths(args.cc, is32bit)
|
||||
deflibs = get_def_libpaths(args.cc, bits == 32)
|
||||
|
||||
phoff, phsz, phnum = 0, 0, 0
|
||||
if is32bit:
|
||||
if bits == 32:
|
||||
phoff = struct.unpack('<I', blob[28:28+4])[0]
|
||||
phsz = struct.unpack('<H', blob[42:42+2])[0]
|
||||
phnum = struct.unpack('<H', blob[44:52+2])[0]
|
||||
elif machnum == archmagic['x86_64']:
|
||||
elif bits == 64:
|
||||
phoff = struct.unpack('<Q', blob[32:32+8])[0]
|
||||
phsz = struct.unpack('<H', blob[54:54+2])[0]
|
||||
phnum = struct.unpack('<H', blob[56:56+2])[0]
|
||||
else:
|
||||
eprintf("Unknown architecture " + str(machnum))
|
||||
sys.exit(1)
|
||||
|
||||
for i in range(phnum):
|
||||
off = phoff + i * phsz
|
||||
#print(hex(off))
|
||||
|
||||
ptyp, poff, pva, ppa, pfsz, pmsz, pfl, pal = 0,0,0,0,0,0,0,0
|
||||
if is32bit:
|
||||
if bits == 32:
|
||||
ptyp, poff, pva, ppa, pfsz, pmsz, pfl, pal = \
|
||||
struct.unpack('<ILLLIIII', blob[off:off+phsz])
|
||||
else:
|
||||
|
@ -113,15 +94,13 @@ def main():
|
|||
if ptyp != 2: # PT_DYNAMIC
|
||||
continue
|
||||
|
||||
#print(hex(poff))
|
||||
|
||||
# right after the dynamic section, the smol 'symtab'/'hashtab' is found
|
||||
#
|
||||
# note that on i386, every lib name is followed by an E9 byte
|
||||
# if the next libname/first byte of the hash is null, the table has
|
||||
# come to an end.
|
||||
|
||||
if is32bit:
|
||||
if bits == 32:
|
||||
j = poff
|
||||
strtaboff = 0
|
||||
while j < poff + pfsz:
|
||||
|
@ -159,8 +138,7 @@ def main():
|
|||
|
||||
break
|
||||
else: # 64-bit
|
||||
eprintf("Currently unsuppored, sorry.")
|
||||
sys.exit(1)
|
||||
error('Currently unsupported, sorry.')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
110
smolparse.py
110
smolparse.py
|
@ -1,110 +0,0 @@
|
|||
|
||||
import glob
|
||||
import os.path
|
||||
import subprocess
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from smolshared import *
|
||||
|
||||
def decide_arch(inpfiles):
|
||||
archs=set({})
|
||||
|
||||
for fp in inpfiles:
|
||||
with open(fp, 'rb') as ff:
|
||||
_ = ff.read(16) # ei_ident
|
||||
_ = ff.read( 2) # ei_type
|
||||
machine = ff.read(2) # ei_machine
|
||||
|
||||
machnum = struct.unpack('<H', machine)[0]
|
||||
archs.add(machnum)
|
||||
|
||||
if len(archs) != 1:
|
||||
eprintf("Input files have multiple architectures, can't link this...")
|
||||
sys.exit(1)
|
||||
|
||||
archn = list(archs)[0]
|
||||
|
||||
if archn not in archmagic:
|
||||
eprintf("Unknown architecture number " + str(archn) + \
|
||||
". Consult elf.h and rebuild your object files.")
|
||||
|
||||
return archmagic[archn]
|
||||
|
||||
def build_reloc_typ_table(reo):
|
||||
relocs = dict({})
|
||||
|
||||
for s in reo.decode('utf-8').splitlines():
|
||||
stuff = s.split()
|
||||
|
||||
# prolly a 'header' line
|
||||
if len(stuff) < 5:
|
||||
continue
|
||||
|
||||
# yes, we're assuming every reference to the same symbol will use the
|
||||
# same relocation type. if this isn't the case, your compiler flags are
|
||||
# stupid
|
||||
relocs[stuff[4]] = stuff[2]
|
||||
|
||||
return relocs
|
||||
|
||||
def get_needed_syms(readelf_bin, inpfiles):
|
||||
output = subprocess.check_output([readelf_bin, '-s', '-W']+inpfiles,
|
||||
stderr=subprocess.DEVNULL)
|
||||
outrel = subprocess.check_output([readelf_bin, '-r', '-W']+inpfiles,
|
||||
stderr=subprocess.DEVNULL)
|
||||
|
||||
relocs = build_reloc_typ_table(outrel)
|
||||
|
||||
syms=set({})
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
stuff = entry.split()
|
||||
if len(stuff)<8: continue
|
||||
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0 \
|
||||
and stuff[7] in relocs:
|
||||
syms.add((stuff[7], relocs[stuff[7]]))
|
||||
|
||||
#needgot = False
|
||||
#if "_GLOBAL_OFFSET_TABLE_" in syms:
|
||||
# needgot = True
|
||||
# syms.remove("_GLOBAL_OFFSET_TABLE_")
|
||||
|
||||
return syms#, needgot
|
||||
|
||||
def get_cc_paths(cc_bin):
|
||||
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
|
||||
stderr=subprocess.DEVNULL)
|
||||
paths = {}
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
category, path = entry.split(': ', 1)
|
||||
path = path.lstrip('=')
|
||||
paths[category] = list(set(os.path.realpath(p) \
|
||||
for p in path.split(':') if os.path.isdir(p)))
|
||||
return paths
|
||||
|
||||
def is_valid_elf(f):
|
||||
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
|
||||
|
||||
def find_lib(spaths, wanted):
|
||||
for p in spaths:
|
||||
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
|
||||
if os.path.isfile(f) and is_valid_elf(f): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
|
||||
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
|
||||
|
||||
eprintf("E: couldn't find library '" + wanted + "'.")
|
||||
sys.exit(1)
|
||||
|
||||
def find_libs(spaths, wanted): return map(lambda l: find_lib(spaths, l), wanted)
|
||||
|
||||
def find_symbol(scanelf_bin, libraries, libnames, symbol):
|
||||
output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \
|
||||
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
|
||||
for entry in output.decode('utf-8').splitlines():
|
||||
sym, soname, path = entry.split(' ', 2)
|
||||
if symbol in sym.split(',') and \
|
||||
any(soname.startswith('lib'+l) for l in libnames):
|
||||
return soname
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
|
||||
import sys
|
||||
|
||||
archmagic = {
|
||||
'i386': 3, 3: 'i386' ,
|
||||
'x86_64': 62, 62: 'x86_64',
|
||||
}
|
||||
|
||||
def hash_djb2(s):
|
||||
h = 5381
|
||||
for c in s:
|
||||
h = (h * 33 + ord(c)) & 0xFFFFFFFF
|
||||
return h
|
||||
|
||||
def eprintf(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
|
||||
|
|
@ -16,11 +16,11 @@ def main(argv):
|
|||
|
||||
i = 0
|
||||
while data[-i - 1] == 0:
|
||||
i = i + 1
|
||||
i += 1
|
||||
|
||||
args.output.write(data[0:len(data)-i])
|
||||
args.output.write(data[:-i])
|
||||
|
||||
if __name__ == '__main__':
|
||||
rv = main(sys.argv)
|
||||
exit(0 if rv is None else rv)
|
||||
rv = main(sys.argv) or 0
|
||||
sys.exit(rv)
|
||||
|
Loading…
Reference in New Issue