1
0
mirror of synced 2024-11-24 14:30:11 +01:00
bemaniutils/bemani/utils/struct.py

308 lines
11 KiB
Python

import argparse
import pefile # type: ignore
import struct
import sys
from typing import Optional, Tuple, List, Any
"""
Some examples of valid format specifiers and what they do are as follows:
*z&+0x200# = Decodes an array of string pointers, and includes the count
alongside the string, starting at 0x200, and displayed in
hex. Broken down, it has the following parts:
*z = Dereference the current value (*) and treat that integer
as a pointer to a null-terminated string (z).
&+0x200# = Print the current line number (#), offset by the
value 0x200 (+0x200) as a hex number (&).
"""
class LineNumber:
def __init__(self, offset: int, hex: bool) -> None:
self.offset = offset
self.hex = hex
def toStr(self, lineno: int) -> str:
if self.hex:
return str(hex(self.offset + lineno))
else:
return str(self.offset + lineno)
class StructPrinter:
def __init__(self, data: bytes) -> None:
self.data = data
self.pe = pefile.PE(data=data, fast_load=True)
def parse_format_spec(self, fmt: str) -> Tuple[str, List[Any]]:
prefix: str = ""
cur_accum: str = ""
specs: List[Any] = []
in_prefix: bool = True
in_dereference: bool = False
parens: int = 0
for c in fmt:
if in_prefix:
# Remember byte ordering prefix.
if c in ["@", "=", "<", ">", "!"]:
prefix += c
continue
else:
in_prefix = False
if c == "*":
if parens == 0:
# Track if we're in a dereference section.
if not in_dereference:
in_dereference = True
if cur_accum:
raise Exception("Cannot have dereference marker in middle of specifier!")
else:
# Double-indirect dereference.
cur_accum += c
else:
# Just add it, its part of a subsection.
cur_accum += c
continue
if c == "(":
# Clump together format specs inside parens.
if not in_dereference:
raise Exception("Cannot have parenthesis in middle of specifier!")
if parens > 0:
cur_accum += c
parens += 1
continue
if c == ")":
# If we hit the end of a paren, we gotta recursively parse.
if not in_dereference:
raise Exception("Cannot have parenthesis in middle of specifier!")
parens -= 1
if parens > 0:
cur_accum += c
else:
# Parse the accumulated data as its own format spec.
_, subspec = self.parse_format_spec(cur_accum)
cur_accum = ""
in_dereference = False
specs.append(subspec)
continue
# If we have either an integer prefix, or an offset prefix, accumulate here.
if c.isdigit() or c in '+-' or (c in 'xabcdefABCDEF' and ('+' in cur_accum or '-' in cur_accum)):
cur_accum += c
continue
if c == "&":
if cur_accum:
raise Exception("Hex specifier should be at beginning of specifier!")
cur_accum += c
continue
cur_accum += c
# If we're dereferencing, still do the subparse even though its only one thing.
if parens == 0:
if in_dereference:
_, subspec = self.parse_format_spec(cur_accum)
specs.append(subspec)
in_dereference = False
else:
specs.append(cur_accum)
cur_accum = ""
return prefix, specs
def virtual_to_physical(self, offset: int) -> int:
for section in self.pe.sections:
start = section.VirtualAddress + self.pe.OPTIONAL_HEADER.ImageBase
end = start + section.SizeOfRawData
if offset >= start and offset < end:
return (offset - start) + section.PointerToRawData
raise Exception(f'Couldn\'t find raw offset for virtual offset 0x{offset:08x}')
def parse_struct(self, startaddr: str, endaddr: str, countstr: str, fmt: str) -> List[Any]:
start: int = int(startaddr, 16)
end: Optional[int] = int(endaddr, 16) if endaddr is not None else None
count: Optional[int] = int(countstr, 16 if "0x" in countstr else 10) if countstr is not None else None
if end is None and count is None:
raise Exception("Can't handle endless structures!")
if end is not None and count is not None:
raise Exception("Can't handle providing two ends!")
if start >= self.pe.OPTIONAL_HEADER.ImageBase:
# Assume this is virtual
start = self.virtual_to_physical(start)
if end is not None and end >= self.pe.OPTIONAL_HEADER.ImageBase:
# Assume this is virtual
end = self.virtual_to_physical(end)
# Parse out any dereference instructions.
prefix, specs = self.parse_format_spec(fmt)
return self.__parse_struct(start, end, count, prefix, specs)
def __parse_struct(self, start: int, end: Optional[int], count: Optional[int], prefix: str, specs: List[Any]) -> List[Any]:
# Now, parse out each chunk.
output = []
offset = start
while True:
if end is not None:
if offset >= end:
break
if count is not None:
if count <= 0:
break
count -= 1
line: List[Any] = []
for spec in specs:
if isinstance(spec, str):
if spec[0] == "&":
dohex = True
spec = spec[1:]
else:
dohex = False
if spec[-1] == "#":
if len(spec) > 1:
if spec[0] not in "+-":
raise Exception("Line number offsets must include a '+' or '-' prefix!")
val = int(spec[:-1], 16 if "0x" in spec else 10)
else:
val = 0
line.append(LineNumber(val, dohex))
elif spec == "z":
# Null-terminated string
bs = b""
while self.data[offset:(offset + 1)] != b"\x00":
bs += self.data[offset:(offset + 1)]
offset += 1
# Advance past null byte
offset += 1
# Hex makes no sense here
if dohex:
raise Exception("Cannot display string as hex!")
line.append(bs.decode('ascii'))
else:
size = struct.calcsize(prefix + spec)
chunk = self.data[offset:(offset + size)]
if spec != 'x':
if dohex:
line.append(hex(struct.unpack(prefix + spec, chunk)[0]))
else:
line.append(struct.unpack(prefix + spec, chunk)[0])
offset += size
else:
chunk = self.data[offset:(offset + 4)]
pointer = struct.unpack(prefix + "I", chunk)[0]
offset += 4
# Resolve the physical address of this pointer, trick the substructure into
# parsing only one iteration.
if pointer == 0x0:
# Null pointer
line.append(None)
else:
pointer = self.virtual_to_physical(pointer)
subparse = self.__parse_struct(pointer, pointer + 1, None, prefix, spec)
if len(subparse) != 1:
raise Exception("Logic error!")
line.append(subparse[0])
output.append(line)
return output
def main() -> int:
parser = argparse.ArgumentParser(description="A utility to print structs out of a DLL.")
parser.add_argument(
"--file",
help="DLL file to extract from.",
type=str,
default=None,
required=True,
)
parser.add_argument(
"--start",
help="Hex offset into the file we should start at.",
type=str,
default=None,
required=True,
)
parser.add_argument(
"--end",
help="Hex offset into the file we should go until. Alternatively you can use --count",
type=str,
default=None,
)
parser.add_argument(
"--count",
help="Number of entries to parse, as a decimal or hex integer. Alternatively you can use --end",
type=str,
default=None,
)
parser.add_argument(
"--format",
help=(
"Python struct format we should print using. See https://docs.python.org/3/library/struct.html "
"for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. "
"Surround a chunk of format specifiers with parenthesis to dereference complex structures. For "
"ease of unpacking C string pointers, the specifier \"z\" is recognzied to mean null-terminated "
"string. A & preceeding a format specifier means that we should convert to hex before displaying."
"For the ease of decoding enumerations, the specifier \"#\" is recognized to mean entry number."
"You can provide it a offset value such as \"+20#\" to start at a certain number."
),
type=str,
default=None,
required=True,
)
args = parser.parse_args()
if args.end is None and args.count is None:
print("You must specify either an --end or a --count!", file=sys.stderr)
return 1
if args.end is not None and args.count is not None:
print("You cannot specify both an --end and a --count!", file=sys.stderr)
return 1
fp = open(args.file, 'rb')
data = fp.read()
fp.close()
def __str(obj: object, lineno: int) -> str:
if obj is None:
return "NULL"
elif isinstance(obj, LineNumber):
return obj.toStr(lineno)
elif isinstance(obj, list):
if len(obj) == 1:
return __str(obj[0], lineno)
else:
return f"({', '.join(__str(o, lineno) for o in obj)})"
else:
return repr(obj)
printer = StructPrinter(data)
lines = printer.parse_struct(args.start, args.end, args.count, args.format)
for i, line in enumerate(lines):
print(", ".join(__str(entry, i) for entry in line))
return 0
if __name__ == '__main__':
sys.exit(main())