reorganize everything into one invocation

This commit is contained in:
Shiz 2019-04-21 11:56:00 +02:00
parent d9dbaae27a
commit 161dc41290
23 changed files with 353 additions and 335 deletions

2
.gitignore vendored
View File

@ -1,3 +1,3 @@
/bin
/obj
/__pycache__
__pycache__

View File

@ -1,6 +1,6 @@
OBJDIR := obj
BINDIR := bin
SRCDIR := src
SRCDIR := ldr
LDDIR := ld
TESTDIR:= test
@ -40,13 +40,14 @@ CXXFLAGS += -m$(BITS) $(shell pkg-config --cflags sdl2)
LIBS=-lc
SMOLFLAGS +=
ASFLAGS += -DUSE_INTERP -DALIGN_STACK
#-DUSE_DNLOAD_LOADER #-DUSE_DT_DEBUG #-DUSE_DL_FINI #-DNO_START_ARG #-DUNSAFE_DYNAMIC
SMOLFLAGS += --smol-opt align_stack
# use_dnload_loader, use_dt_debug, use_dl_fini, no_start_arg, unsafe_dynamic
NASM ?= nasm
PYTHON3 ?= python3
SMOLFLAGS += --nasm=$(NASM)
all: $(BINDIR)/hello-crt $(BINDIR)/sdl-crt $(BINDIR)/flag $(BINDIR)/hello-_start
LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
@ -54,41 +55,33 @@ LIBS += $(filter-out -pthread,$(shell pkg-config --libs sdl2)) -lX11 #-lGL
clean:
@$(RM) -vrf $(OBJDIR) $(BINDIR)
%/:
$(OBJDIR)/ $(BINDIR)/:
@mkdir -vp "$@"
.SECONDARY:
$(OBJDIR)/%.lto.o: $(SRCDIR)/%.c $(OBJDIR)/
$(OBJDIR)/%.lto.o: $(SRCDIR)/%.c | $(OBJDIR)/
$(CC) -flto $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.lto.o: $(TESTDIR)/%.c $(OBJDIR)/
$(OBJDIR)/%.lto.o: $(TESTDIR)/%.c | $(OBJDIR)/
$(CC) -flto $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.o: $(SRCDIR)/%.c $(OBJDIR)/
$(OBJDIR)/%.start.o: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o
$(CC) $(LDFLAGS) -r -o "$@" $^
$(OBJDIR)/%.o: $(SRCDIR)/%.c | $(OBJDIR)/
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.o: $(TESTDIR)/%.c $(OBJDIR)/
$(OBJDIR)/%.o: $(TESTDIR)/%.c | $(OBJDIR)/
$(CC) $(CFLAGS) -c "$<" -o "$@"
$(OBJDIR)/%.start.o: $(OBJDIR)/%.lto.o $(OBJDIR)/crt1.lto.o
$(CC) $(LDFLAGS) -r -o "$@" $^
$(OBJDIR)/symbols.%.asm: $(OBJDIR)/%.o
$(PYTHON3) ./smol.py $(SMOLFLAGS) $(LIBS) "$<" "$@"
$(BINDIR)/%: $(OBJDIR)/%.o | $(BINDIR)/
./smold $(SMOLFLAGS) $(LIBS) $^ -o $@
./smoltrunc "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@"
$(OBJDIR)/stub.%.o: $(OBJDIR)/symbols.%.asm $(SRCDIR)/header32.asm \
$(SRCDIR)/loader32.asm
$(NASM) $(ASFLAGS) $< -o $@
$(OBJDIR)/stub.%.start.o: $(OBJDIR)/symbols.%.start.asm $(SRCDIR)/header32.asm \
$(SRCDIR)/loader32.asm
$(NASM) $(ASFLAGS) $< -o $@
$(BINDIR)/%: $(OBJDIR)/%.o $(OBJDIR)/stub.%.o $(BINDIR)/
$(CC) -Wl,-Map=$(BINDIR)/$*.map $(LDFLAGS_) $(OBJDIR)/$*.o $(OBJDIR)/stub.$*.o -o "$@"
./rmtrailzero.py "$@" "$(OBJDIR)/$(notdir $@)" && mv "$(OBJDIR)/$(notdir $@)" "$@" && chmod +x "$@"
$(BINDIR)/%-crt: $(OBJDIR)/%.start.o $(OBJDIR)/stub.%.start.o $(BINDIR)/
$(CC) -Wl,-Map=$(BINDIR)/$*-crt.map $(LDFLAGS_) $(OBJDIR)/$*.start.o $(OBJDIR)/stub.$*.start.o -o "$@"
$(BINDIR)/%-crt: $(OBJDIR)/%.start.o | $(BINDIR)/
./smold $(SMOLFLAGS) $(LIBS) $^ -o $@
.PHONY: all clean

View File

@ -2,47 +2,49 @@
Shoddy minsize-oriented linker
PoC by Shiz, bugfixing and 64-bit version by PoroCYon.
PoC and rewrite by Shiz, bugfixing and 64-bit version by PoroCYon.
## Requirements
A functional toolchain, Python 3 and `pax-utils`.
## Usage
```sh
./smol.py -lfoo -lbar input.o... smol-output.asm
nasm -I src/ [-Doption ...] -o nasm-output.o smol-output.asm
ld -T ld/link.ld --oformat=binary -o output.elf nasm-output.o input.o...
# or cc -T ld/link.ld -Wl,--oformat=binary -o output.elf nasm-output.o input.o...
./smol.py [LDFLAGS...] [--smol-opt=...] -lfoo -lbar input.o -o smol-output
```
* `USE_INTERP`: Include an interp segment in the output ELF file. If not, the
dynamic linker **must** be invoked *explicitely*! (You probably want to
enable this.) Costs the size of a phdr plus the size of the interp string.
* `ALIGN_STACK`: *64-bit only*: realign the stack so that SSE instructions
Possible `smol-opt`s:
* `use_interp` (default): Include an interp segment in the output ELF file.
If not, the dynamic linker **must** be invoked *explicitely*!
Costs the size of a phdr plus the size of the interp string.
* `align_stack`: *64-bit only*: realign the stack so that SSE instructions
won't segfault. Costs 1 byte.
* `USE_NX`: Don't use `RWE` segments at all. Not very well tested. Costs the
* `use_nx`: Don't use `RWE` segments at all. Not very well tested. Costs the
size of 1 phdr.
* `USE_DL_FINI`: keep track of the `_dl_fini` function and pass it to your
* `use_dl_fini`: keep track of the `_dl_fini` function and pass it to your
`_start`. Costs 2 bytes, plus maybe a few more depending on how it's passed
to `__libc_start_main`.
* `USE_DT_DEBUG`: retrieve the `struct link_map` from the `r_debug` linker
* `use_dt_debug`: retrieve the `struct link_map` from the `r_debug` linker
data (which is placed at `DT_DEBUG` at startup) instead of exploiting data
leakage from `_dt_start_user`. Might be more compatible and compressable, but
strictly worse size-wise by 10 (i386) or 3 (x86_64) bytes.
* `SKIP_ENTRIES`: skip the first two entries of the `struct link_map`, which
* `skip_entries`: skip the first two entries of the `struct link_map`, which
represent the main binary and the vDSO. Costs around 5 bytes.
* `USE_DNLOAD_LOADER`: *64-bit only*: use the symbol loading mechanism as used
* `use_dnload_loader`: *64-bit only*: use the symbol loading mechanism as used
in dnload (i.e. traverse the symtab of the imported libraries). Slightly
larger, but probably better compressable.
* `NO_START_ARG`: *don't* pass the stack pointer to `_start` as the first arg.
* `no_start_arg`: *don't* pass the stack pointer to `_start` as the first arg.
Will make it unable to read argc/argv/environ, but gives you 3 bytes.
```
usage: smol.py [-h] [-m TARGET] [-l LIB] [-L DIR] [--nasm NASM] [--cc CC]
[--scanelf SCANELF] [--readelf READELF]
input [input ...] output
usage: smold [-h] [-m TARGET] [-l LIB] [-L DIR] [--smol-opt OPT]
[--loader-dir LOADER_DIR] [--nasm NASM] [--ld LD] [--cc CC]
[--scanelf SCANELF] [--readelf READELF] [-o OUT]
input [input ...]
positional arguments:
input input object file
output output nasm file
input input object file(s)
optional arguments:
-h, --help show this help message and exit
@ -51,27 +53,32 @@ optional arguments:
-l LIB, --library LIB
libraries to link against
-L DIR, --libdir DIR directories to search libraries in
--smol-opt OPT optimization flags for smol
--loader-dir LOADER_DIR
path to loader files
--nasm NASM which nasm binary to use
--ld LD which ld binary to use
--cc CC which cc binary to use
--scanelf SCANELF which scanelf binary to use
--readelf READELF which readelf binary to use
-o OUT, --output OUT output binary
```
A minimal crt (and `_start` funcion) are provided in case you want to use `main`.
## smoldd
`smoldd.py` is a script that tries to resolve all symbols from the hashes when
`smoldd` is a script that tries to resolve all symbols from the hashes when
imported by a `smol`-ified binary. This can thus be used to detect user mistakes
during dynamic linking. (Think of it as an equivalent of `ldd`, except that it
also checks whether the imported functions are present as well.)
***NOTE***: `smoldd.py` currently doesn't support 64-bit binaries anymore, as
***NOTE***: `smoldd` currently doesn't support 64-bit binaries anymore, as
there's currently no (good) way of retrieving the symbol hash table anymore.
## Internal workings
`smol.py` inspects the input object files for needed library files and symbols.
`smold` inspects the input object files for needed library files and symbols.
It then outputs the list of needed libraries, hashes of the needed symbols and
provides stubs for the external functions. This is then combined with a
custom-made, small ELF header and 'runtime linker' which resolves the symbols
@ -129,9 +136,12 @@ and 0, and will thus pretty much never change.)
## Greets
auld alrj blackle breadbox faemiyah gib3&tix0 las leblane parcelshit unlord
```
auld / alrj / blackle / breadbox / Calodox
faemiyah / gib3 & tix0 / las / leblane
parcelshit / PWP / Team210 / unlord / yx
```
## License
[WTFPL](/LICENSE)

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
%if __BITS__ == 32
%define EI_CLASS (1) ; 1 == 32-bit
%else

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
%include "linkscr.inc"
[section .header]

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
%include "linkscr.inc"
[section .header]

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
extern _smol_origin
extern _smol_total_size
extern _smol_text_start

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm ts=8:
%include "rtld.inc"
%ifdef ELF_TYPE

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
;%define R10_BIAS (0x2B4)
%define R10_BIAS (0x2B4+0x40)

View File

@ -1,5 +1,3 @@
; vim: set ft=nasm:
%if __BITS__ == 32
%define LM_NAME_OFFSET 0x4
%define LM_NEXT_OFFSET 0xC

73
smol.py
View File

@ -1,73 +0,0 @@
#!/usr/bin/env python3
import argparse
import glob
import itertools
import os.path
import shutil
import subprocess
import sys
from smolshared import *
from smolparse import *
from smolemit import *
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--target', default='', \
help='architecture to generate asm code for (default: auto)')
parser.add_argument('-l', '--library', metavar='LIB', action='append', \
help='libraries to link against')
parser.add_argument('-L', '--libdir', metavar='DIR', action='append', \
help="directories to search libraries in")
parser.add_argument('--nasm', default=shutil.which('nasm'), \
help="which nasm binary to use")
parser.add_argument('--cc', default=shutil.which('cc'), \
help="which cc binary to use")
parser.add_argument('--scanelf', default=shutil.which('scanelf'), \
help="which scanelf binary to use")
parser.add_argument('--readelf', default=shutil.which('readelf'), \
help="which readelf binary to use")
# parser.add_argument('-d', '--dnload', default=False, action='store_true', \
# help="Use dnload's mechanism of importing functions. Slightly larger, but usually better compressable.")
# parser.add_argument('--libsep', default=False, action='store_true', \
# help="Separete import symbols per library, instead of looking at every library when resolving a symbol.")
parser.add_argument('input', nargs='+', help="input object file")
parser.add_argument('output', type=argparse.FileType('w'), \
help="output nasm file", default=sys.stdout)
args = parser.parse_args()
if args.libdir is None: args.libdir = []
arch = args.target.tolower() if len(args.target)!=0 \
else decide_arch(args.input)
if arch not in archmagic:
eprintf("Unknown architecture '" + str(arch) + "'")
sys.exit(1)
syms = get_needed_syms(args.readelf, args.input)
paths = get_cc_paths(args.cc)
spaths = args.libdir + paths['libraries']
libraries=paths['libraries']
libnames = args.library
libs = list(find_libs(spaths, libnames))
symbols = {}
for symbol, reloc in syms:
library = find_symbol(args.scanelf, libs, libnames, symbol)
if not library:
eprintf("could not find symbol: {}".format(symbol))
sys.exit(1)
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
output(arch, symbols, args.output)
if __name__ == '__main__':
main()

0
smol/__init__.py Normal file
View File

12
smol/elf.py Normal file
View File

@ -0,0 +1,12 @@
import enum
class ELFMachine(enum.IntEnum):
i386 = 3
x86_64 = 62
ELF_DEFAULT_BITS = {
ELFMachine.i386: 32,
ELFMachine.x86_64: 64
}

View File

@ -1,39 +1,31 @@
import sys
from smolshared import *
from .util import error, hash_djb2
from .elf import ELFMachine
def output_x86(libraries, outf):
outf.write('; vim: set ft=nasm:\n') # be friendly
outf.write('bits 32\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header32.asm"\n')
outf.write('%include "header-x86.asm"\n')
outf.write('dynamic.needed:\n')
for library in libraries:
outf.write('dd 1;DT_NEEDED\n')
outf.write('dd (_symbols.{} - _strtab)\n'.format(shorts[library]))
outf.write('dynamic.end:\n')
# if needgot:
# outf.write('global _GLOBAL_OFFSET_TABLE_\n')
# outf.write('_GLOBAL_OFFSET_TABLE_:\n')
# outf.write('dd dynamic\n')
outf.write('_strtab:\n')
# if not libsep:
# for library, symrels in libraries.items():
# outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
outf.write('_symbols:\n')
for library, symrels in libraries.items():
# if libsep:
outf.write('\t_symbols.{}: db "{}",0\n'.format(shorts[library], library))
for sym, reloc in symrels:
# meh
if reloc != 'R_386_PC32':
eprintf('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
error('Relocation type ' + reloc + ' of symbol ' + sym + ' unsupported!')
sys.exit(1)
hash = hash_djb2(sym)
@ -47,17 +39,15 @@ def output_x86(libraries, outf):
outf.write('db 0\n')
outf.write('_symbols.end:\n')
outf.write('%include "loader32.asm"\n')
# end output_x86
outf.write('%include "loader-x86.asm"\n')
def output_amd64(libraries, outf):
outf.write('; vim: set ft=nasm:\n')
def output_x86_64(libraries, outf):
outf.write('bits 64\n')
shorts = { l: l.split('.', 1)[0].lower().replace('-', '_') for l in libraries }
outf.write('%include "header64.asm"\n')
outf.write('%include "header-x86_64.asm"\n')
outf.write('dynamic.needed:\n')
for library in libraries:
outf.write(' dq 1;DT_NEEDED\n')
@ -112,14 +102,16 @@ global {name}
""".format(lib=shorts[library],name=sym).lstrip('\n'))
outf.write('_smolplt.end:\n')
outf.write('%include "loader64.asm"\n')
# end output_amd64
outf.write('%include "loader-x86_64.asm"\n')
def output(arch, libraries, outf):
if arch == 'i386': output_x86(libraries, outf)
elif arch == 'x86_64': output_amd64(libraries, outf)
def output_table(arch, libraries, outf):
if arch == ELFMachine.i386:
return output_x86(libraries, outf)
elif arch == ELFMachine.x86_64:
return output_x86_64(libraries, outf)
else:
error('')
eprintf("E: cannot emit for arch '" + str(arch) + "'")
sys.exit(1)

103
smol/parse.py Normal file
View File

@ -0,0 +1,103 @@
import glob
import os.path
import subprocess
import struct
import sys
from .elf import ELFMachine
from .util import warn, error
def decide_arch(files):
machine = None
for fn in files:
with open(fn, 'rb') as f:
ident = f.read(16) # ei_ident
clas = ident[4] * 32 # EI_CLASS
_ = f.read(2) # ei_type
mach = f.read(2) # ei_machine
machid = struct.unpack('<H', mach)[0]
if machine is not None and machine != machid:
warn('Input files have multiple architectures, can\'t link this...')
return None, None
machine = machid
if machine not in set(item.value for item in ELFMachine):
warn('Unsupported machine ID: {}'.format(machine))
warn('If you are sure this is correct, contact us to add support!')
return None, None
return ELFMachine(machine), clas
def build_reloc_typ_table(reo):
relocs = {}
for s in reo.decode('utf-8').splitlines():
cols = s.split()
# prolly a 'header' line
if len(cols) < 5:
continue
# yes, we're assuming every reference to the same symbol will use the
# same relocation type. if this isn't the case, your compiler flags are
# stupid
relocs[cols[4]] = cols[2]
return relocs
def get_needed_syms(readelf_bin, inpfiles):
output = subprocess.check_output([readelf_bin, '-s', '-W'] + inpfiles,
stderr=subprocess.DEVNULL)
outrel = subprocess.check_output([readelf_bin, '-r', '-W'] + inpfiles,
stderr=subprocess.DEVNULL)
relocs = build_reloc_typ_table(outrel)
syms = set()
for entry in output.decode('utf-8').splitlines():
cols = entry.split()
if len(cols) < 8:
continue
if cols[4] == "GLOBAL" and cols[6] == "UND" and cols[7] and cols[7] in relocs:
syms.add((cols[7], relocs[cols[7]]))
return syms
def get_cc_paths(cc_bin):
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
stderr=subprocess.DEVNULL)
paths = {}
for entry in output.decode('utf-8').splitlines():
category, path = entry.split(': ', 1)
path = path.lstrip('=')
paths[category] = list(set(os.path.realpath(p) \
for p in path.split(':') if os.path.isdir(p)))
return paths
def is_valid_elf(fn):
with open(fn, 'rb') as f:
return f.read(4) == b'\x7FELF'
def find_lib(spaths, wanted):
for p in spaths:
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f):
return f
return None
def find_symbol(scanelf_bin, libraries, libnames, symbol):
output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
for entry in output.decode('utf-8').splitlines():
sym, soname, path = entry.split(' ', 2)
if symbol in sym.split(',') and \
any(soname.startswith('lib'+l) for l in libnames):
return soname
return None

42
smol/util.py Normal file
View File

@ -0,0 +1,42 @@
import sys
import struct
def log(msg, file=sys.stdout):
file.write(msg)
file.write('\n')
def warn(msg):
log(msg, file=sys.stderr)
def error(msg):
log(msg, file=sys.stderr)
sys.exit(1)
def hash_djb2(s):
h = 5381
for c in s:
h = (h * 33 + ord(c)) & 0xFFFFFFFF
return h
def readbyte(blob, off):
return struct.unpack('<B', blob[off:off+1])[0], (off+1)
def readint(blob, off):
return struct.unpack('<I', blob[off:off+4])[0], (off+4)
def readlong(blob, off):
return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
def readstr(blob, off):
text = bytearray()
while True:
char, off = readbyte(blob, off)
if char == 0:
break
text.append(char)
return text.decode('utf-8'), off

101
smold Executable file
View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
import os
from os import path
import sys
import argparse
import shutil
import tempfile
import subprocess
from smol.util import error
from smol.elf import ELFMachine, ELF_DEFAULT_BITS
from smol.parse import decide_arch, find_lib, find_symbol, get_cc_paths, get_needed_syms
from smol.emit import output_table
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--target',
help='architecture to generate asm code for (default: auto)')
parser.add_argument('-l', '--library', metavar='LIB', action='append',
help='libraries to link against')
parser.add_argument('-L', '--libdir', metavar='DIR', action='append',
help='directories to search libraries in')
parser.add_argument('--smol-opt', metavar='OPT', default=['use_interp'], action='append',
help='optimization flags for smol')
parser.add_argument('--loader-dir', default=path.join(path.dirname(__file__), 'ldr'),
help='path to loader files')
parser.add_argument('--nasm', default=os.getenv('NASM') or shutil.which('nasm'),
help='which nasm binary to use')
parser.add_argument('--ld', default=os.getenv('LD') or shutil.which('ld'),
help='which ld binary to use')
parser.add_argument('--cc', default=os.getenv('CC') or shutil.which('cc'),
help='which cc binary to use')
parser.add_argument('--scanelf', default=os.getenv('SCANELF') or shutil.which('scanelf'),
help='which scanelf binary to use')
parser.add_argument('--readelf', default=os.getenv('READELF') or shutil.which('readelf'),
help='which readelf binary to use')
parser.add_argument('input', nargs='+', help='input object file(s)')
parser.add_argument('-o', '--output', metavar='OUT', default='smol.out', help='output binary')
args, ld_args = parser.parse_known_args()
for util in ['nasm', 'cc', 'scanelf', 'readelf', 'ld']:
if not getattr(args, util):
parser.error('utility "{u}" could not be found! either install it or pass the path with --{u}'
.format(util))
opts = set()
for opt in args.smol_opt[:]:
if opt.startswith('-'):
opts.discard(opt[1:])
continue
opts.add(opt)
args.library = args.library or []
args.libdir = args.libdir or []
if args.target:
arch = args.target.tolower().replace('elf_', '')
if arch not in ELFMachine:
parser.error('unknown architecture: {}'.format(arch))
arch = ELFMachine(arch)
bits = ELF_DEFAULT_BITS[arch]
else:
arch, bits = decide_arch(args.input)
if not arch or not bits:
error('Invalid architecture!')
syms = get_needed_syms(args.readelf, args.input)
paths = get_cc_paths(args.cc)
libdirs = args.libdir + paths['libraries']
libs = []
libnames = args.library
for libname in libnames:
lib = find_lib(libdirs, libname)
if not lib:
error('could not find library: {}'.format(libname))
libs.append(lib)
symbols = {}
for symbol, reloc in syms:
library = find_symbol(args.scanelf, libs, libnames, symbol)
if not library:
error("could not find symbol: {}".format(symbol))
symbols.setdefault(library, [])
symbols[library].append((symbol, reloc))
as_args = ['-D' + opt.upper() for opt in opts]
with tempfile.NamedTemporaryFile('w', suffix='.s') as table, tempfile.NamedTemporaryFile('w', suffix='.o') as tableobj:
output_table(arch, symbols, table)
table.flush()
try:
subprocess.check_call([args.nasm] + as_args + ['-I', args.loader_dir + '/', '-f', 'elf{}'.format(bits), table.name, '-o', tableobj.name])
subprocess.check_call([args.ld, '-T', os.path.join(args.loader_dir, 'link.ld'), '--oformat=binary', '-o', args.output, tableobj.name] + ld_args + args.input)
except subprocess.CalledProcessError:
sys.exit(1)
if __name__ == '__main__':
main()

View File

@ -8,27 +8,9 @@ import subprocess
import struct
import sys
from smolshared import *
from smol.util import hash_djb2, readint, readstr, error
from smol.elf import ELFMachine
def readbyte(blob, off):
return struct.unpack('<B', blob[off:off+1])[0], (off+1)
def readint(blob, off):
return struct.unpack('<I', blob[off:off+4])[0], (off+4)
def readlong(blob, off):
return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
def readstr(blob, off):
text = bytearray()
while True:
char, off = readbyte(blob, off)
if char == 0:
break
text.append(char)
return text.decode('utf-8'), off
def get_def_libpaths(cc_bin, is32bit):
if is32bit:
@ -37,10 +19,10 @@ def get_def_libpaths(cc_bin, is32bit):
out = subprocess.check_output([cc_bin, '-print-search-dirs'],
stderr=subprocess.DEVNULL)
stuff = dict({})
stuff = {}
for l in out.decode('utf-8').splitlines():
blah = l.split(': ')
stuff[blah[0]] = blah[1].lstrip('=').split(':')
cols = l.split(': ')
stuff[cols[0]] = cols[1].lstrip('=').split(':')
return stuff["libraries"]
@ -55,14 +37,14 @@ def build_hashtab(scanelf_bin, lib):
out = subprocess.check_output([scanelf_bin, '-B', '-F', '%s', '-s', '%pd%*', lib],
stderr=subprocess.DEVNULL)
blah = set(out.decode('utf-8').split('\n'))
ret = dict({})
lines = set(out.decode('utf-8').split('\n'))
ret = {}
for x in blah:
y = x.split()
if len(y) != 7:
for line in lines:
cols = line.split()
if len(cols) != 7:
continue
ret[hash_djb2(y[6])] = y[6]
ret[hash_djb2(cols[6])] = cols[6]
return ret
@ -79,31 +61,30 @@ def main():
blob = args.input.read()
bits = blob[4] * 32
machnum = struct.unpack('<H', blob[18:18+2])[0]
try:
machine = ELFMachine(machnum)
except:
error('unknown architecture: {}'.format(machnum))
is32bit = machnum == archmagic['i386']
deflibs = get_def_libpaths(args.cc, is32bit)
deflibs = get_def_libpaths(args.cc, bits == 32)
phoff, phsz, phnum = 0, 0, 0
if is32bit:
if bits == 32:
phoff = struct.unpack('<I', blob[28:28+4])[0]
phsz = struct.unpack('<H', blob[42:42+2])[0]
phnum = struct.unpack('<H', blob[44:52+2])[0]
elif machnum == archmagic['x86_64']:
elif bits == 64:
phoff = struct.unpack('<Q', blob[32:32+8])[0]
phsz = struct.unpack('<H', blob[54:54+2])[0]
phnum = struct.unpack('<H', blob[56:56+2])[0]
else:
eprintf("Unknown architecture " + str(machnum))
sys.exit(1)
for i in range(phnum):
off = phoff + i * phsz
#print(hex(off))
ptyp, poff, pva, ppa, pfsz, pmsz, pfl, pal = 0,0,0,0,0,0,0,0
if is32bit:
if bits == 32:
ptyp, poff, pva, ppa, pfsz, pmsz, pfl, pal = \
struct.unpack('<ILLLIIII', blob[off:off+phsz])
else:
@ -113,15 +94,13 @@ def main():
if ptyp != 2: # PT_DYNAMIC
continue
#print(hex(poff))
# right after the dynamic section, the smol 'symtab'/'hashtab' is found
#
# note that on i386, every lib name is followed by an E9 byte
# if the next libname/first byte of the hash is null, the table has
# come to an end.
if is32bit:
if bits == 32:
j = poff
strtaboff = 0
while j < poff + pfsz:
@ -159,8 +138,7 @@ def main():
break
else: # 64-bit
eprintf("Currently unsuppored, sorry.")
sys.exit(1)
error('Currently unsupported, sorry.')
if __name__ == '__main__':
main()

View File

@ -1,110 +0,0 @@
import glob
import os.path
import subprocess
import struct
import sys
from smolshared import *
def decide_arch(inpfiles):
archs=set({})
for fp in inpfiles:
with open(fp, 'rb') as ff:
_ = ff.read(16) # ei_ident
_ = ff.read( 2) # ei_type
machine = ff.read(2) # ei_machine
machnum = struct.unpack('<H', machine)[0]
archs.add(machnum)
if len(archs) != 1:
eprintf("Input files have multiple architectures, can't link this...")
sys.exit(1)
archn = list(archs)[0]
if archn not in archmagic:
eprintf("Unknown architecture number " + str(archn) + \
". Consult elf.h and rebuild your object files.")
return archmagic[archn]
def build_reloc_typ_table(reo):
relocs = dict({})
for s in reo.decode('utf-8').splitlines():
stuff = s.split()
# prolly a 'header' line
if len(stuff) < 5:
continue
# yes, we're assuming every reference to the same symbol will use the
# same relocation type. if this isn't the case, your compiler flags are
# stupid
relocs[stuff[4]] = stuff[2]
return relocs
def get_needed_syms(readelf_bin, inpfiles):
output = subprocess.check_output([readelf_bin, '-s', '-W']+inpfiles,
stderr=subprocess.DEVNULL)
outrel = subprocess.check_output([readelf_bin, '-r', '-W']+inpfiles,
stderr=subprocess.DEVNULL)
relocs = build_reloc_typ_table(outrel)
syms=set({})
for entry in output.decode('utf-8').splitlines():
stuff = entry.split()
if len(stuff)<8: continue
if stuff[4] == "GLOBAL" and stuff[6] == "UND" and len(stuff[7])>0 \
and stuff[7] in relocs:
syms.add((stuff[7], relocs[stuff[7]]))
#needgot = False
#if "_GLOBAL_OFFSET_TABLE_" in syms:
# needgot = True
# syms.remove("_GLOBAL_OFFSET_TABLE_")
return syms#, needgot
def get_cc_paths(cc_bin):
output = subprocess.check_output([cc_bin, '-print-search-dirs'],
stderr=subprocess.DEVNULL)
paths = {}
for entry in output.decode('utf-8').splitlines():
category, path = entry.split(': ', 1)
path = path.lstrip('=')
paths[category] = list(set(os.path.realpath(p) \
for p in path.split(':') if os.path.isdir(p)))
return paths
def is_valid_elf(f):
with open(f, 'rb') as ff: return ff.read(4) == b'\x7FELF'
def find_lib(spaths, wanted):
for p in spaths:
for f in glob.glob(glob.escape(p + '/lib' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f): return f
for f in glob.glob(glob.escape(p + '/' + wanted) + '.so*'):
if os.path.isfile(f) and is_valid_elf(f): return f
#for f in glob.glob(glob.escape(p) + '/lib' + wanted + '.a' ): return f
#for f in glob.glob(glob.escape(p) + '/' + wanted + '.a' ): return f
eprintf("E: couldn't find library '" + wanted + "'.")
sys.exit(1)
def find_libs(spaths, wanted): return map(lambda l: find_lib(spaths, l), wanted)
def find_symbol(scanelf_bin, libraries, libnames, symbol):
output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \
'+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL)
for entry in output.decode('utf-8').splitlines():
sym, soname, path = entry.split(' ', 2)
if symbol in sym.split(',') and \
any(soname.startswith('lib'+l) for l in libnames):
return soname

View File

@ -1,16 +0,0 @@
import sys
archmagic = {
'i386': 3, 3: 'i386' ,
'x86_64': 62, 62: 'x86_64',
}
def hash_djb2(s):
h = 5381
for c in s:
h = (h * 33 + ord(c)) & 0xFFFFFFFF
return h
def eprintf(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)

View File

@ -16,11 +16,11 @@ def main(argv):
i = 0
while data[-i - 1] == 0:
i = i + 1
i += 1
args.output.write(data[0:len(data)-i])
args.output.write(data[:-i])
if __name__ == '__main__':
rv = main(sys.argv)
exit(0 if rv is None else rv)
rv = main(sys.argv) or 0
sys.exit(rv)