diff --git a/smol/hackyelf.py b/smol/hackyelf.py index c7a66bf..965eac2 100644 --- a/smol/hackyelf.py +++ b/smol/hackyelf.py @@ -8,6 +8,7 @@ from struct import unpack from typing import * + ELFCLASS32 = 1 ELFCLASS64 = 2 @@ -24,6 +25,30 @@ DT_NEEDED = 1 DT_STRTAB = 5 DT_SYMTAB = 6 +SHT_NULL = 0 +SHT_PROGBITS = 1 +SHT_SYMTAB = 2 +SHT_STRTAB = 3 +SHT_DYNSYM = 11 + +STB_LOCAL = 0 +STB_GLOBAL = 1 +STB_WEAK = 2 + +STT_NOTYPE = 0 +STT_OBJECT = 1 +STT_FUNC = 2 +STT_SECTION= 3 +STT_FILE = 4 +STT_COMMON = 5 +STT_TLS = 6 +STT_GNU_IFUNC = 10 + +STV_DEFAULT = 0 +STV_INTERNAL = 1 +STV_HIDDEN = 2 +STV_PROTECTED = 3 + class Phdr(NamedTuple): ptype: int off : int @@ -38,6 +63,27 @@ class Dyn(NamedTuple): tag: int val: int +class Shdr(NamedTuple): + name: Union[int, str] + type: int + flags: int + addr: int + offset: int + size: int + link: int + info: int + addralign: int + entsize: int + +class Sym(NamedTuple): + name: str + value: int + size: int + type: int + binding: int + visibility: int + shndx: int + class ELF(NamedTuple): data : bytes ident : bytes @@ -46,8 +92,18 @@ class ELF(NamedTuple): entry : int phdrs : Sequence[Phdr] dyn : Sequence[Dyn] + shdrs : Sequence[Shdr] + symtab: Sequence[Sym] + dynsym: Sequence[Sym] is32bit: bool +def readstr(data: bytes, off: int) -> str: + strb = bytearray() + while data[off] != 0 and off < len(data): + strb.append(data[off]) + off = off + 1 + return strb.decode('utf-8') + # yeah, there's some code duplication here # idgaf @@ -74,6 +130,46 @@ def parse_dyn32(data: bytes, dynp: Phdr) -> Dyn: return ds +def parse_shdr32(data: bytes, shoff: int, shentsz: int, shnum: int, + shstrndx: int) -> Sequence[Shdr]: + if shnum*shentsz+shoff > len(data) or shentsz==0 or shnum==0 or shoff==0: + print("snum*shentsz+shoff",shnum*shentsz+shoff) + print("len(data)",len(data)) + print("shentsz",shentsz) + print("shnum",shnum) + print("shoff",shoff) + return [] + + ss = [] + for off in range(shoff, shoff+shentsz*shnum, shentsz): + noff, typ, flags, addr, off, size, link, info, align, entsz = \ + unpack(' Sequence[Sym]: + ss = [] + for off in range(sym.offset, sym.offset+sym.size, sym.entsize): + noff, val, sz, info, other, shndx = \ + unpack('> 4), other, shndx) + ss.append(s) + return sorted(ss, key=lambda x:x.value) + def parse_32(data: bytes) -> ELF: ident = data[:16] eclass = data[4] @@ -81,8 +177,12 @@ def parse_32(data: bytes) -> ELF: entry = unpack(' ELF: dyn = parse_dyn32(data, p) break - return ELF(data, ident, eclass, mach, entry, phdrs, dyn, True) + shdrs = parse_shdr32(data, shoff, shentsz, shnum, shstrndx) + #print("shdrs",shdrs) + + symtabsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".symtab"] + strtabsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".strtab"] + dynsymsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".dynsym"] + dynstrsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".dynstr"] + + #print("symtab",symtabsh) + #print("strtab",strtabsh) + + assert len(symtabsh) < 2 + assert len(strtabsh) < 2 + assert len(dynsymsh) < 2 + assert len(dynstrsh) < 2 + + symtab, dynsym = None, None + if len(symtabsh) and len(strtabsh): + symtab = parse_sym32(data, symtabsh[0], strtabsh[0]) \ + if len(shdrs) > 0 else [] + if len(dynsymsh) and len(dynstrsh): + dynsym = parse_sym32(data, symtabsh[0], strtabsh[0]) \ + if len(shdrs) > 0 else [] + + return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs, + symtab, dynsym, True) def parse_phdr64(data: bytes, phoff:int, phentsz:int, phnum:int) -> Sequence[Phdr]: ps = [] for off in range(phoff, phoff+phentsz*phnum, phentsz): - # TODO + # TODO # what is TODO exactly?? ptype, flags, off, vaddr, paddr, filesz, memsz, align = \ unpack(' Dyn: return ds +def parse_shdr64(data: bytes, shoff: int, shentsz: int, shnum: int, + shstrndx: int) -> Sequence[Shdr]: + if shnum*shentsz+shoff >= len(data) or shentsz==0 or shnum==0 or shoff==0: + return [] + + ss = [] + for off in range(shoff, shoff+shentsz*shnum, shentsz): + noff, typ, flags, addr, off, size, link, info, align, entsz = \ + unpack(' Sequence[Sym]: + ss = [] + for off in range(sym.offset, sym.offset+sym.size, sym.entsize): + noff, info, other, shndx, value, sz = \ + unpack('> 4), other, shndx) + ss.append(s) + return sorted(ss, key=lambda x:x.value) + def parse_64(data: bytes) -> ELF: ident = data[:16] eclass = data[4] @@ -125,8 +285,12 @@ def parse_64(data: bytes) -> ELF: entry = unpack(' ELF: dyn = parse_dyn64(data, p) break - return ELF(data, ident, eclass, mach, entry, phdrs, dyn, False) + shdrs = parse_shdr64(data, shoff, shentsz, shnum, shstrndx) + + symtabsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".symtab"] + strtabsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".strtab"] + dynsymsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".dynsym"] + dynstrsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".dynstr"] + + assert len(symtabsh) < 2 + assert len(strtabsh) < 2 + assert len(dynsymsh) < 2 + assert len(dynstrsh) < 2 + + symtab, dynsym = None, None + if len(symtabsh) and len(strtabsh): + symtab = parse_sym64(data, symtabsh[0], strtabsh[0]) \ + if len(shdrs) > 0 else [] + if len(dynsymsh) and len(dynstrsh): + dynsym = parse_sym64(data, symtabsh[0], strtabsh[0]) \ + if len(shdrs) > 0 else [] + + return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs, + symtab, dynsym, False) def parse(data: bytes) -> ELF: assert data[:4] == b'\x7FELF', "Not a valid ELF file" # good enough diff --git a/smol/linkmap.py b/smol/linkmap.py index 81ea94c..c849d49 100644 --- a/smol/linkmap.py +++ b/smol/linkmap.py @@ -27,6 +27,17 @@ class XRef(NamedTuple): name: str deff: str reff: Sequence[str] +class ImpObj(NamedTuple): + ar: str + obj: str +class ImpRsnObj(NamedTuple): + sym: str + obj: ImpObj +class ImpRsnSym(NamedTuple): + sym: str +class ArImp(NamedTuple): + impobj: ImpObj + reason: Union[ImpRsnSym, ImpRsnObj] class LinkMap(NamedTuple): common : Sequence[CommonSym] @@ -34,11 +45,13 @@ class LinkMap(NamedTuple): memcfg : Sequence[MemCfg] mmap : Sequence[MMap] xref : Sequence[XRef] + arimp : Sequence[ArImp] def parse_common( ls: Sequence[str]) -> Sequence[CommonSym]: return [] # TODO def parse_discard(ls: Sequence[str]) -> Sequence[Discard ]: return [] # TODO def parse_memcfg( ls: Sequence[str]) -> Sequence[MemCfg ]: return [] # TODO def parse_xref( ls: Sequence[str]) -> Sequence[XRef ]: return [] # TODO +def parse_arimp( ls: Sequence[str]) -> Sequence[ArImp ]: return [] # TODO def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]: rrr = [] @@ -55,7 +68,9 @@ def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]: #print(repr(l)) s = l.strip(); w = s.split() - if s.startswith('LOAD ') or s.startswith('OUTPUT('): continue#break + if s.startswith('LOAD ') or s.startswith('OUTPUT(') or \ + s.startswith('START GROUP') or s.startswith('END GROUP'): + continue#break if l[0] != ' ': bigsect = w[0] @@ -68,7 +83,7 @@ def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]: continue # addr placed on next line for prettyprinting reasons #print(repr(l), w[0]) - assert w[0].startswith("0x"), "welp, bad symbol addr" + assert w[0].startswith("0x"), "welp, bad symbol addr %s"%w[0] addr = int(w[0], 16) @@ -90,10 +105,11 @@ def parse(s: str) -> LinkMap: MEMCFG = 2 MMAP = 3 XREF = 4 + ARIMP = 5 curpt = -1 - commonl, discardl, memcfgl, mmapl, xrefl = [], [], [], [], [] + commonl, discardl, memcfgl, mmapl, xrefl, arimpl = [], [], [], [], [], [] for l in s.split('\n'): if len(l.strip()) == 0: continue @@ -103,15 +119,18 @@ def parse(s: str) -> LinkMap: elif ls == "Discarded input sections": curpt = DISCARD elif ls == "Memory Configuration": curpt = MEMCFG elif ls == "Linker script and memory map": curpt = MMAP - elif ls == 'Cross Reference Table': curpt = XREF + elif ls == "Cross Reference Table": curpt = XREF + elif ls == "Archive member included to satisfy reference by file (symbol)": curpt = ARIMP elif curpt == COMMON : commonl.append(l) elif curpt == DISCARD: discardl.append(l) elif curpt == MEMCFG : memcfgl.append(l) elif curpt == MMAP : mmapl.append(l) elif curpt == XREF : xrefl.append(l) + elif curpt == ARIMP : arimpl.append(l) else: assert False, "bad line %s" % ls - return LinkMap(parse_common(commonl), parse_discard(discardl), \ - parse_memcfg(memcfgl), parse_mmap(mmapl), parse_xref(xrefl)) + return LinkMap(parse_common(commonl), parse_discard(discardl), + parse_memcfg(memcfgl), parse_mmap(mmapl), parse_xref(xrefl), + parse_arimp(arimpl))