From 69ab6723feeeeb865d7ae86c826295787b1ce625 Mon Sep 17 00:00:00 2001 From: Jennifer Taylor Date: Fri, 15 Jan 2021 01:03:29 +0000 Subject: [PATCH] Far more powerful struct printer that can handle complex structures with pointers. --- bemani/utils/struct.py | 186 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 166 insertions(+), 20 deletions(-) diff --git a/bemani/utils/struct.py b/bemani/utils/struct.py index af5de13..3385fd4 100644 --- a/bemani/utils/struct.py +++ b/bemani/utils/struct.py @@ -1,37 +1,174 @@ import argparse import pefile # type: ignore import struct +from typing import Tuple, List, Any -def parse_struct(data: bytes, startaddr: str, endaddr: str, fmt: str) -> None: - pe = pefile.PE(data=data, fast_load=True) - start: int = int(startaddr, 16) - end: int = int(endaddr, 16) +class StructPrinter: + def __init__(self, data: bytes) -> None: + self.data = data + self.pe = pefile.PE(data=data, fast_load=True) - def virtual_to_physical(offset: int) -> int: - for section in pe.sections: - start = section.VirtualAddress + pe.OPTIONAL_HEADER.ImageBase + def parse_format_spec(self, fmt: str) -> Tuple[str, List[Any]]: + prefix: str = "" + cur_accum: str = "" + specs: List[Any] = [] + in_prefix: bool = True + in_dereference: bool = False + parens: int = 0 + + for c in fmt: + if in_prefix: + # Remember byte ordering prefix. + if c in ["@", "=", "<", ">", "!"]: + prefix += c + continue + else: + in_prefix = False + + if c == "*": + if parens == 0: + # Track if we're in a dereference section. + if not in_dereference: + in_dereference = True + if cur_accum: + raise Exception("Cannot have dereference marker in middle of specifier!") + else: + # Double-indirect dereference. + cur_accum += c + else: + # Just add it, its part of a subsection. + cur_accum += c + continue + + if c == "(": + # Clump together format specs inside parens. + if not in_dereference: + raise Exception("Cannot have parenthesis in middle of specifier!") + if parens > 0: + cur_accum += c + + parens += 1 + continue + + if c == ")": + # If we hit the end of a paren, we gotta recursively parse. + if not in_dereference: + raise Exception("Cannot have parenthesis in middle of specifier!") + parens -= 1 + if parens > 0: + cur_accum += c + else: + # Parse the accumulated data as its own format spec. + _, subspec = self.parse_format_spec(cur_accum) + cur_accum = "" + in_dereference = False + specs.append(subspec) + + continue + + if c.isdigit(): + cur_accum += c + continue + + if c == "&": + if cur_accum: + raise Exception("Hex specifier should be at beginning of specifier!") + cur_accum += c + continue + + cur_accum += c + + # If we're dereferencing, still do the subparse even though its only one thing. + if parens == 0: + if in_dereference: + _, subspec = self.parse_format_spec(cur_accum) + specs.append(subspec) + in_dereference = False + else: + specs.append(cur_accum) + + cur_accum = "" + + return prefix, specs + + def virtual_to_physical(self, offset: int) -> int: + for section in self.pe.sections: + start = section.VirtualAddress + self.pe.OPTIONAL_HEADER.ImageBase end = start + section.SizeOfRawData if offset >= start and offset < end: return (offset - start) + section.PointerToRawData raise Exception(f'Couldn\'t find raw offset for virtual offset 0x{offset:08x}') - if start >= pe.OPTIONAL_HEADER.ImageBase: - # Assume this is virtual - start = virtual_to_physical(start) + def parse_struct(self, startaddr: str, endaddr: str, fmt: str) -> List[Any]: + start: int = int(startaddr, 16) + end: int = int(endaddr, 16) - if end >= pe.OPTIONAL_HEADER.ImageBase: - # Assume this is virtual - end = virtual_to_physical(end) + if start >= self.pe.OPTIONAL_HEADER.ImageBase: + # Assume this is virtual + start = self.virtual_to_physical(start) - size: int = struct.calcsize(fmt) + if end >= self.pe.OPTIONAL_HEADER.ImageBase: + # Assume this is virtual + end = self.virtual_to_physical(end) - while start < end: - chunk = data[start:(start + size)] - start = start + size + # Parse out any dereference instructions. + prefix, specs = self.parse_format_spec(fmt) - print(list(struct.unpack(fmt, chunk))) + return self.__parse_struct(start, end, prefix, specs) + + def __parse_struct(self, start: int, end: int, prefix: str, specs: List[Any]) -> List[Any]: + # Now, parse out each chunk. + output = [] + offset = start + while offset < end: + line = [] + for spec in specs: + if isinstance(spec, str): + if spec[0] == "&": + dohex = True + spec = spec[1:] + else: + dohex = False + + if spec == "z": + # Null-terminated string + bs = b"" + while self.data[offset:(offset + 1)] != b"\x00": + bs += self.data[offset:(offset + 1)] + offset += 1 + # Advance past null byte + offset += 1 + + # Hex makes no sense here + if dohex: + raise Exception("Cannot display string as hex!") + line.append(bs.decode('ascii')) + else: + size = struct.calcsize(prefix + spec) + chunk = self.data[offset:(offset + size)] + if dohex: + line.append(hex(struct.unpack(prefix + spec, chunk)[0])) + else: + line.append(struct.unpack(prefix + spec, chunk)[0]) + offset += size + else: + chunk = self.data[offset:(offset + 4)] + pointer = struct.unpack(prefix + "I", chunk)[0] + offset += 4 + + # Resolve the physical address of this pointer, trick the substructure into + # parsing only one iteration. + pointer = self.virtual_to_physical(pointer) + subparse = self.__parse_struct(pointer, pointer + 1, prefix, spec) + if len(subparse) != 1: + raise Exception("Logic error!") + line.append(subparse[0]) + + output.append(line) + + return output def main() -> None: @@ -59,7 +196,13 @@ def main() -> None: ) parser.add_argument( "--format", - help="Python struct format we should print using.", + help=( + "Python struct format we should print using. See https://docs.python.org/3/library/struct.html " + "for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. " + "Surround a chunk of format specifiers with parenthesis to dereference complex structures. For " + "ease of unpacking C string pointers, the specifier \"z\" is recognzied to mean null-terminated " + "string. A % preceeding a format specifier means that we should convert to hex before displaying." + ), type=str, default=None, required=True, @@ -70,7 +213,10 @@ def main() -> None: data = fp.read() fp.close() - parse_struct(data, args.start, args.end, args.format) + printer = StructPrinter(data) + lines = printer.parse_struct(args.start, args.end, args.format) + for line in lines: + print(line) if __name__ == '__main__':