mirror of https://github.com/Shizmob/smol
shdr parsing in hackyelf, linkmap parsing bug fixes
This commit is contained in:
parent
6ad8eed5b2
commit
e37a0d8e50
191
smol/hackyelf.py
191
smol/hackyelf.py
|
@ -8,6 +8,7 @@
|
||||||
from struct import unpack
|
from struct import unpack
|
||||||
from typing import *
|
from typing import *
|
||||||
|
|
||||||
|
|
||||||
ELFCLASS32 = 1
|
ELFCLASS32 = 1
|
||||||
ELFCLASS64 = 2
|
ELFCLASS64 = 2
|
||||||
|
|
||||||
|
@ -24,6 +25,30 @@ DT_NEEDED = 1
|
||||||
DT_STRTAB = 5
|
DT_STRTAB = 5
|
||||||
DT_SYMTAB = 6
|
DT_SYMTAB = 6
|
||||||
|
|
||||||
|
SHT_NULL = 0
|
||||||
|
SHT_PROGBITS = 1
|
||||||
|
SHT_SYMTAB = 2
|
||||||
|
SHT_STRTAB = 3
|
||||||
|
SHT_DYNSYM = 11
|
||||||
|
|
||||||
|
STB_LOCAL = 0
|
||||||
|
STB_GLOBAL = 1
|
||||||
|
STB_WEAK = 2
|
||||||
|
|
||||||
|
STT_NOTYPE = 0
|
||||||
|
STT_OBJECT = 1
|
||||||
|
STT_FUNC = 2
|
||||||
|
STT_SECTION= 3
|
||||||
|
STT_FILE = 4
|
||||||
|
STT_COMMON = 5
|
||||||
|
STT_TLS = 6
|
||||||
|
STT_GNU_IFUNC = 10
|
||||||
|
|
||||||
|
STV_DEFAULT = 0
|
||||||
|
STV_INTERNAL = 1
|
||||||
|
STV_HIDDEN = 2
|
||||||
|
STV_PROTECTED = 3
|
||||||
|
|
||||||
class Phdr(NamedTuple):
|
class Phdr(NamedTuple):
|
||||||
ptype: int
|
ptype: int
|
||||||
off : int
|
off : int
|
||||||
|
@ -38,6 +63,27 @@ class Dyn(NamedTuple):
|
||||||
tag: int
|
tag: int
|
||||||
val: int
|
val: int
|
||||||
|
|
||||||
|
class Shdr(NamedTuple):
|
||||||
|
name: Union[int, str]
|
||||||
|
type: int
|
||||||
|
flags: int
|
||||||
|
addr: int
|
||||||
|
offset: int
|
||||||
|
size: int
|
||||||
|
link: int
|
||||||
|
info: int
|
||||||
|
addralign: int
|
||||||
|
entsize: int
|
||||||
|
|
||||||
|
class Sym(NamedTuple):
|
||||||
|
name: str
|
||||||
|
value: int
|
||||||
|
size: int
|
||||||
|
type: int
|
||||||
|
binding: int
|
||||||
|
visibility: int
|
||||||
|
shndx: int
|
||||||
|
|
||||||
class ELF(NamedTuple):
|
class ELF(NamedTuple):
|
||||||
data : bytes
|
data : bytes
|
||||||
ident : bytes
|
ident : bytes
|
||||||
|
@ -46,8 +92,18 @@ class ELF(NamedTuple):
|
||||||
entry : int
|
entry : int
|
||||||
phdrs : Sequence[Phdr]
|
phdrs : Sequence[Phdr]
|
||||||
dyn : Sequence[Dyn]
|
dyn : Sequence[Dyn]
|
||||||
|
shdrs : Sequence[Shdr]
|
||||||
|
symtab: Sequence[Sym]
|
||||||
|
dynsym: Sequence[Sym]
|
||||||
is32bit: bool
|
is32bit: bool
|
||||||
|
|
||||||
|
def readstr(data: bytes, off: int) -> str:
|
||||||
|
strb = bytearray()
|
||||||
|
while data[off] != 0 and off < len(data):
|
||||||
|
strb.append(data[off])
|
||||||
|
off = off + 1
|
||||||
|
return strb.decode('utf-8')
|
||||||
|
|
||||||
# yeah, there's some code duplication here
|
# yeah, there's some code duplication here
|
||||||
# idgaf
|
# idgaf
|
||||||
|
|
||||||
|
@ -74,6 +130,46 @@ def parse_dyn32(data: bytes, dynp: Phdr) -> Dyn:
|
||||||
|
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
|
def parse_shdr32(data: bytes, shoff: int, shentsz: int, shnum: int,
|
||||||
|
shstrndx: int) -> Sequence[Shdr]:
|
||||||
|
if shnum*shentsz+shoff > len(data) or shentsz==0 or shnum==0 or shoff==0:
|
||||||
|
print("snum*shentsz+shoff",shnum*shentsz+shoff)
|
||||||
|
print("len(data)",len(data))
|
||||||
|
print("shentsz",shentsz)
|
||||||
|
print("shnum",shnum)
|
||||||
|
print("shoff",shoff)
|
||||||
|
return []
|
||||||
|
|
||||||
|
ss = []
|
||||||
|
for off in range(shoff, shoff+shentsz*shnum, shentsz):
|
||||||
|
noff, typ, flags, addr, off, size, link, info, align, entsz = \
|
||||||
|
unpack('<IIIIIIIIII', data[off:off+10*4])
|
||||||
|
s = Shdr(noff, typ, flags, addr, off, size, link, info, align, entsz)
|
||||||
|
ss.append(s)
|
||||||
|
|
||||||
|
if shstrndx < shnum:
|
||||||
|
shstr = ss[shstrndx]
|
||||||
|
for i in range(len(ss)):
|
||||||
|
sname = readstr(data, shstr.offset + ss[i].name) \
|
||||||
|
if ss[i].name < shstr.size else None
|
||||||
|
ss[i] = Shdr(sname, ss[i].type, ss[i].flags, ss[i].addr,
|
||||||
|
ss[i].offset, ss[i].size, ss[i].link, ss[i].info,
|
||||||
|
ss[i].addralign, ss[i].entsize)
|
||||||
|
|
||||||
|
return ss
|
||||||
|
|
||||||
|
def parse_sym32(data: bytes, sym: Shdr, strt: Shdr) -> Sequence[Sym]:
|
||||||
|
ss = []
|
||||||
|
for off in range(sym.offset, sym.offset+sym.size, sym.entsize):
|
||||||
|
noff, val, sz, info, other, shndx = \
|
||||||
|
unpack('<IIIBBH', data[off:off+3*4+2+2])
|
||||||
|
|
||||||
|
sn = readstr(data, strt.offset + noff) \
|
||||||
|
if noff < strt.size else None
|
||||||
|
s = Sym(sn, val, sz, (info & 15), (info >> 4), other, shndx)
|
||||||
|
ss.append(s)
|
||||||
|
return sorted(ss, key=lambda x:x.value)
|
||||||
|
|
||||||
def parse_32(data: bytes) -> ELF:
|
def parse_32(data: bytes) -> ELF:
|
||||||
ident = data[:16]
|
ident = data[:16]
|
||||||
eclass = data[4]
|
eclass = data[4]
|
||||||
|
@ -81,8 +177,12 @@ def parse_32(data: bytes) -> ELF:
|
||||||
entry = unpack('<I', data[24:24+4])[0]
|
entry = unpack('<I', data[24:24+4])[0]
|
||||||
|
|
||||||
phoff = unpack('<I', data[28:28+4])[0]
|
phoff = unpack('<I', data[28:28+4])[0]
|
||||||
|
shoff = unpack('<I', data[32:32+4])[0]
|
||||||
phentsz = unpack('<H', data[42:42+2])[0]
|
phentsz = unpack('<H', data[42:42+2])[0]
|
||||||
phnum = unpack('<H', data[44:44+2])[0]
|
phnum = unpack('<H', data[44:44+2])[0]
|
||||||
|
shentsz = unpack('<H', data[46:46+2])[0]
|
||||||
|
shnum = unpack('<H', data[48:48+2])[0]
|
||||||
|
shstrndx= unpack('<H', data[50:50+2])[0]
|
||||||
|
|
||||||
phdrs = parse_phdr32(data, phoff, phentsz, phnum)
|
phdrs = parse_phdr32(data, phoff, phentsz, phnum)
|
||||||
dyn = None
|
dyn = None
|
||||||
|
@ -92,12 +192,37 @@ def parse_32(data: bytes) -> ELF:
|
||||||
dyn = parse_dyn32(data, p)
|
dyn = parse_dyn32(data, p)
|
||||||
break
|
break
|
||||||
|
|
||||||
return ELF(data, ident, eclass, mach, entry, phdrs, dyn, True)
|
shdrs = parse_shdr32(data, shoff, shentsz, shnum, shstrndx)
|
||||||
|
#print("shdrs",shdrs)
|
||||||
|
|
||||||
|
symtabsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".symtab"]
|
||||||
|
strtabsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".strtab"]
|
||||||
|
dynsymsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".dynsym"]
|
||||||
|
dynstrsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".dynstr"]
|
||||||
|
|
||||||
|
#print("symtab",symtabsh)
|
||||||
|
#print("strtab",strtabsh)
|
||||||
|
|
||||||
|
assert len(symtabsh) < 2
|
||||||
|
assert len(strtabsh) < 2
|
||||||
|
assert len(dynsymsh) < 2
|
||||||
|
assert len(dynstrsh) < 2
|
||||||
|
|
||||||
|
symtab, dynsym = None, None
|
||||||
|
if len(symtabsh) and len(strtabsh):
|
||||||
|
symtab = parse_sym32(data, symtabsh[0], strtabsh[0]) \
|
||||||
|
if len(shdrs) > 0 else []
|
||||||
|
if len(dynsymsh) and len(dynstrsh):
|
||||||
|
dynsym = parse_sym32(data, symtabsh[0], strtabsh[0]) \
|
||||||
|
if len(shdrs) > 0 else []
|
||||||
|
|
||||||
|
return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs,
|
||||||
|
symtab, dynsym, True)
|
||||||
|
|
||||||
def parse_phdr64(data: bytes, phoff:int, phentsz:int, phnum:int) -> Sequence[Phdr]:
|
def parse_phdr64(data: bytes, phoff:int, phentsz:int, phnum:int) -> Sequence[Phdr]:
|
||||||
ps = []
|
ps = []
|
||||||
for off in range(phoff, phoff+phentsz*phnum, phentsz):
|
for off in range(phoff, phoff+phentsz*phnum, phentsz):
|
||||||
# TODO
|
# TODO # what is TODO exactly??
|
||||||
ptype, flags, off, vaddr, paddr, filesz, memsz, align = \
|
ptype, flags, off, vaddr, paddr, filesz, memsz, align = \
|
||||||
unpack('<IIQQQQQQ', data[off:off+2*4+6*8])
|
unpack('<IIQQQQQQ', data[off:off+2*4+6*8])
|
||||||
p = Phdr(ptype, off, vaddr, paddr, filesz, memsz, flags, align)
|
p = Phdr(ptype, off, vaddr, paddr, filesz, memsz, flags, align)
|
||||||
|
@ -118,6 +243,41 @@ def parse_dyn64(data: bytes, dynp: Phdr) -> Dyn:
|
||||||
|
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
|
def parse_shdr64(data: bytes, shoff: int, shentsz: int, shnum: int,
|
||||||
|
shstrndx: int) -> Sequence[Shdr]:
|
||||||
|
if shnum*shentsz+shoff >= len(data) or shentsz==0 or shnum==0 or shoff==0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
ss = []
|
||||||
|
for off in range(shoff, shoff+shentsz*shnum, shentsz):
|
||||||
|
noff, typ, flags, addr, off, size, link, info, align, entsz = \
|
||||||
|
unpack('<IIQQQQIIQQ', data[off:off+4*4+6*8])
|
||||||
|
s = Shdr(noff, typ, flags, addr, off, size, link, info, align, entsz)
|
||||||
|
ss.append(s)
|
||||||
|
|
||||||
|
if shstrndx < shnum:
|
||||||
|
shstr = ss[shstrndx]
|
||||||
|
for i in range(len(ss)):
|
||||||
|
sname = readstr(data, shstr.offset + ss[i].name) \
|
||||||
|
if ss[i].name < shstr.size else None
|
||||||
|
ss[i] = Shdr(sname, ss[i].type, ss[i].flags, ss[i].addr,
|
||||||
|
ss[i].offset, ss[i].size, ss[i].link, ss[i].info,
|
||||||
|
ss[i].addralign, ss[i].entsize)
|
||||||
|
|
||||||
|
return ss
|
||||||
|
|
||||||
|
def parse_sym64(data: bytes, sym: Shdr, strt: Shdr) -> Sequence[Sym]:
|
||||||
|
ss = []
|
||||||
|
for off in range(sym.offset, sym.offset+sym.size, sym.entsize):
|
||||||
|
noff, info, other, shndx, value, sz = \
|
||||||
|
unpack('<IBBHQQ', data[off:off+4+2+2+8*2])
|
||||||
|
|
||||||
|
sn = readstr(data, strt.offset + noff) \
|
||||||
|
if noff < strt.size else None
|
||||||
|
s = Sym(sn, val, sz, (info & 15), (info >> 4), other, shndx)
|
||||||
|
ss.append(s)
|
||||||
|
return sorted(ss, key=lambda x:x.value)
|
||||||
|
|
||||||
def parse_64(data: bytes) -> ELF:
|
def parse_64(data: bytes) -> ELF:
|
||||||
ident = data[:16]
|
ident = data[:16]
|
||||||
eclass = data[4]
|
eclass = data[4]
|
||||||
|
@ -125,8 +285,12 @@ def parse_64(data: bytes) -> ELF:
|
||||||
entry = unpack('<Q', data[24:24+8])[0]
|
entry = unpack('<Q', data[24:24+8])[0]
|
||||||
|
|
||||||
phoff = unpack('<Q', data[32:32+8])[0]
|
phoff = unpack('<Q', data[32:32+8])[0]
|
||||||
|
shoff = unpack('<Q', data[40:40+8])[0]
|
||||||
phentsz = unpack('<H', data[54:54+2])[0]
|
phentsz = unpack('<H', data[54:54+2])[0]
|
||||||
phnum = unpack('<H', data[56:56+2])[0]
|
phnum = unpack('<H', data[56:56+2])[0]
|
||||||
|
shentsz = unpack('<H', data[58:58+2])[0]
|
||||||
|
shnum = unpack('<H', data[60:60+2])[0]
|
||||||
|
shstrndx= unpack('<H', data[62:62+2])[0]
|
||||||
|
|
||||||
phdrs = parse_phdr64(data, phoff, phentsz, phnum)
|
phdrs = parse_phdr64(data, phoff, phentsz, phnum)
|
||||||
dyn = None
|
dyn = None
|
||||||
|
@ -136,7 +300,28 @@ def parse_64(data: bytes) -> ELF:
|
||||||
dyn = parse_dyn64(data, p)
|
dyn = parse_dyn64(data, p)
|
||||||
break
|
break
|
||||||
|
|
||||||
return ELF(data, ident, eclass, mach, entry, phdrs, dyn, False)
|
shdrs = parse_shdr64(data, shoff, shentsz, shnum, shstrndx)
|
||||||
|
|
||||||
|
symtabsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".symtab"]
|
||||||
|
strtabsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".strtab"]
|
||||||
|
dynsymsh = [s for s in shdrs if s.type == SHT_SYMTAB and s.name == ".dynsym"]
|
||||||
|
dynstrsh = [s for s in shdrs if s.type == SHT_STRTAB and s.name == ".dynstr"]
|
||||||
|
|
||||||
|
assert len(symtabsh) < 2
|
||||||
|
assert len(strtabsh) < 2
|
||||||
|
assert len(dynsymsh) < 2
|
||||||
|
assert len(dynstrsh) < 2
|
||||||
|
|
||||||
|
symtab, dynsym = None, None
|
||||||
|
if len(symtabsh) and len(strtabsh):
|
||||||
|
symtab = parse_sym64(data, symtabsh[0], strtabsh[0]) \
|
||||||
|
if len(shdrs) > 0 else []
|
||||||
|
if len(dynsymsh) and len(dynstrsh):
|
||||||
|
dynsym = parse_sym64(data, symtabsh[0], strtabsh[0]) \
|
||||||
|
if len(shdrs) > 0 else []
|
||||||
|
|
||||||
|
return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs,
|
||||||
|
symtab, dynsym, False)
|
||||||
|
|
||||||
def parse(data: bytes) -> ELF:
|
def parse(data: bytes) -> ELF:
|
||||||
assert data[:4] == b'\x7FELF', "Not a valid ELF file" # good enough
|
assert data[:4] == b'\x7FELF', "Not a valid ELF file" # good enough
|
||||||
|
|
|
@ -27,6 +27,17 @@ class XRef(NamedTuple):
|
||||||
name: str
|
name: str
|
||||||
deff: str
|
deff: str
|
||||||
reff: Sequence[str]
|
reff: Sequence[str]
|
||||||
|
class ImpObj(NamedTuple):
|
||||||
|
ar: str
|
||||||
|
obj: str
|
||||||
|
class ImpRsnObj(NamedTuple):
|
||||||
|
sym: str
|
||||||
|
obj: ImpObj
|
||||||
|
class ImpRsnSym(NamedTuple):
|
||||||
|
sym: str
|
||||||
|
class ArImp(NamedTuple):
|
||||||
|
impobj: ImpObj
|
||||||
|
reason: Union[ImpRsnSym, ImpRsnObj]
|
||||||
|
|
||||||
class LinkMap(NamedTuple):
|
class LinkMap(NamedTuple):
|
||||||
common : Sequence[CommonSym]
|
common : Sequence[CommonSym]
|
||||||
|
@ -34,11 +45,13 @@ class LinkMap(NamedTuple):
|
||||||
memcfg : Sequence[MemCfg]
|
memcfg : Sequence[MemCfg]
|
||||||
mmap : Sequence[MMap]
|
mmap : Sequence[MMap]
|
||||||
xref : Sequence[XRef]
|
xref : Sequence[XRef]
|
||||||
|
arimp : Sequence[ArImp]
|
||||||
|
|
||||||
def parse_common( ls: Sequence[str]) -> Sequence[CommonSym]: return [] # TODO
|
def parse_common( ls: Sequence[str]) -> Sequence[CommonSym]: return [] # TODO
|
||||||
def parse_discard(ls: Sequence[str]) -> Sequence[Discard ]: return [] # TODO
|
def parse_discard(ls: Sequence[str]) -> Sequence[Discard ]: return [] # TODO
|
||||||
def parse_memcfg( ls: Sequence[str]) -> Sequence[MemCfg ]: return [] # TODO
|
def parse_memcfg( ls: Sequence[str]) -> Sequence[MemCfg ]: return [] # TODO
|
||||||
def parse_xref( ls: Sequence[str]) -> Sequence[XRef ]: return [] # TODO
|
def parse_xref( ls: Sequence[str]) -> Sequence[XRef ]: return [] # TODO
|
||||||
|
def parse_arimp( ls: Sequence[str]) -> Sequence[ArImp ]: return [] # TODO
|
||||||
|
|
||||||
def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]:
|
def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]:
|
||||||
rrr = []
|
rrr = []
|
||||||
|
@ -55,7 +68,9 @@ def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]:
|
||||||
#print(repr(l))
|
#print(repr(l))
|
||||||
s = l.strip(); w = s.split()
|
s = l.strip(); w = s.split()
|
||||||
|
|
||||||
if s.startswith('LOAD ') or s.startswith('OUTPUT('): continue#break
|
if s.startswith('LOAD ') or s.startswith('OUTPUT(') or \
|
||||||
|
s.startswith('START GROUP') or s.startswith('END GROUP'):
|
||||||
|
continue#break
|
||||||
|
|
||||||
if l[0] != ' ':
|
if l[0] != ' ':
|
||||||
bigsect = w[0]
|
bigsect = w[0]
|
||||||
|
@ -68,7 +83,7 @@ def parse_mmap(ls: Sequence[str]) -> Sequence[MMap]:
|
||||||
continue # addr placed on next line for prettyprinting reasons
|
continue # addr placed on next line for prettyprinting reasons
|
||||||
|
|
||||||
#print(repr(l), w[0])
|
#print(repr(l), w[0])
|
||||||
assert w[0].startswith("0x"), "welp, bad symbol addr"
|
assert w[0].startswith("0x"), "welp, bad symbol addr %s"%w[0]
|
||||||
|
|
||||||
addr = int(w[0], 16)
|
addr = int(w[0], 16)
|
||||||
|
|
||||||
|
@ -90,10 +105,11 @@ def parse(s: str) -> LinkMap:
|
||||||
MEMCFG = 2
|
MEMCFG = 2
|
||||||
MMAP = 3
|
MMAP = 3
|
||||||
XREF = 4
|
XREF = 4
|
||||||
|
ARIMP = 5
|
||||||
|
|
||||||
curpt = -1
|
curpt = -1
|
||||||
|
|
||||||
commonl, discardl, memcfgl, mmapl, xrefl = [], [], [], [], []
|
commonl, discardl, memcfgl, mmapl, xrefl, arimpl = [], [], [], [], [], []
|
||||||
|
|
||||||
for l in s.split('\n'):
|
for l in s.split('\n'):
|
||||||
if len(l.strip()) == 0: continue
|
if len(l.strip()) == 0: continue
|
||||||
|
@ -103,15 +119,18 @@ def parse(s: str) -> LinkMap:
|
||||||
elif ls == "Discarded input sections": curpt = DISCARD
|
elif ls == "Discarded input sections": curpt = DISCARD
|
||||||
elif ls == "Memory Configuration": curpt = MEMCFG
|
elif ls == "Memory Configuration": curpt = MEMCFG
|
||||||
elif ls == "Linker script and memory map": curpt = MMAP
|
elif ls == "Linker script and memory map": curpt = MMAP
|
||||||
elif ls == 'Cross Reference Table': curpt = XREF
|
elif ls == "Cross Reference Table": curpt = XREF
|
||||||
|
elif ls == "Archive member included to satisfy reference by file (symbol)": curpt = ARIMP
|
||||||
elif curpt == COMMON : commonl.append(l)
|
elif curpt == COMMON : commonl.append(l)
|
||||||
elif curpt == DISCARD: discardl.append(l)
|
elif curpt == DISCARD: discardl.append(l)
|
||||||
elif curpt == MEMCFG : memcfgl.append(l)
|
elif curpt == MEMCFG : memcfgl.append(l)
|
||||||
elif curpt == MMAP : mmapl.append(l)
|
elif curpt == MMAP : mmapl.append(l)
|
||||||
elif curpt == XREF : xrefl.append(l)
|
elif curpt == XREF : xrefl.append(l)
|
||||||
|
elif curpt == ARIMP : arimpl.append(l)
|
||||||
else:
|
else:
|
||||||
assert False, "bad line %s" % ls
|
assert False, "bad line %s" % ls
|
||||||
|
|
||||||
return LinkMap(parse_common(commonl), parse_discard(discardl), \
|
return LinkMap(parse_common(commonl), parse_discard(discardl),
|
||||||
parse_memcfg(memcfgl), parse_mmap(mmapl), parse_xref(xrefl))
|
parse_memcfg(memcfgl), parse_mmap(mmapl), parse_xref(xrefl),
|
||||||
|
parse_arimp(arimpl))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue