367 lines
14 KiB
Python
367 lines
14 KiB
Python
import argparse
|
|
import struct
|
|
import sys
|
|
from typing import Optional, Tuple, List, Any
|
|
|
|
from bemani.common import PEFile
|
|
|
|
|
|
class LineNumber:
|
|
def __init__(self, offset: int, hex: bool) -> None:
|
|
self.offset = offset
|
|
self.hex = hex
|
|
|
|
def toStr(self, lineno: int) -> str:
|
|
if self.hex:
|
|
return str(hex(self.offset + lineno))
|
|
else:
|
|
return str(self.offset + lineno)
|
|
|
|
|
|
class StructPrinter:
|
|
def __init__(self, pe: PEFile, default_encoding: str="ascii") -> None:
|
|
self.default_encoding = default_encoding
|
|
self.pe = pe
|
|
|
|
def parse_format_spec(self, fmt: str) -> Tuple[str, List[Any]]:
|
|
prefix: str = ""
|
|
cur_accum: str = ""
|
|
specs: List[Any] = []
|
|
in_prefix: bool = True
|
|
in_dereference: bool = False
|
|
parens: int = 0
|
|
|
|
for c in fmt:
|
|
if in_prefix:
|
|
# Remember byte ordering prefix.
|
|
if c in ["@", "=", "<", ">", "!"]:
|
|
prefix += c
|
|
continue
|
|
else:
|
|
in_prefix = False
|
|
|
|
if c == "*":
|
|
if parens == 0:
|
|
# Track if we're in a dereference section.
|
|
if not in_dereference:
|
|
in_dereference = True
|
|
if cur_accum:
|
|
raise Exception("Cannot have dereference marker in middle of specifier!")
|
|
else:
|
|
# Double-indirect dereference.
|
|
cur_accum += c
|
|
else:
|
|
# Just add it, its part of a subsection.
|
|
cur_accum += c
|
|
continue
|
|
|
|
if c == "(":
|
|
# Clump together format specs inside parens.
|
|
if not in_dereference:
|
|
raise Exception("Cannot have parenthesis in middle of specifier!")
|
|
if parens > 0:
|
|
cur_accum += c
|
|
|
|
parens += 1
|
|
continue
|
|
|
|
if c == ")":
|
|
# If we hit the end of a paren, we gotta recursively parse.
|
|
if not in_dereference:
|
|
raise Exception("Cannot have parenthesis in middle of specifier!")
|
|
parens -= 1
|
|
if parens > 0:
|
|
cur_accum += c
|
|
else:
|
|
# Parse the accumulated data as its own format spec.
|
|
_, subspec = self.parse_format_spec(cur_accum)
|
|
cur_accum = ""
|
|
in_dereference = False
|
|
specs.append(subspec)
|
|
|
|
continue
|
|
|
|
# If we have either an integer prefix, or an offset prefix, accumulate here.
|
|
if c.isdigit() or c in '+-' or (c in 'xabcdefABCDEF' and ('+' in cur_accum or '-' in cur_accum)):
|
|
cur_accum += c
|
|
continue
|
|
|
|
if c == "&":
|
|
if cur_accum:
|
|
raise Exception("Hex specifier should be at beginning of specifier!")
|
|
cur_accum += c
|
|
continue
|
|
|
|
cur_accum += c
|
|
|
|
# If we're dereferencing, still do the subparse even though its only one thing.
|
|
if parens == 0:
|
|
if in_dereference:
|
|
_, subspec = self.parse_format_spec(cur_accum)
|
|
specs.append(subspec)
|
|
in_dereference = False
|
|
else:
|
|
specs.append(cur_accum)
|
|
|
|
cur_accum = ""
|
|
|
|
return prefix, specs
|
|
|
|
def parse_struct(self, startaddr: str, endaddr: str, countstr: str, fmt: str) -> List[Any]:
|
|
start: int = int(startaddr, 16)
|
|
end: Optional[int] = int(endaddr, 16) if endaddr is not None else None
|
|
count: Optional[int] = int(countstr, 16 if "0x" in countstr else 10) if countstr is not None else None
|
|
|
|
if end is None and count is None:
|
|
raise Exception("Can't handle endless structures!")
|
|
if end is not None and count is not None:
|
|
raise Exception("Can't handle providing two ends!")
|
|
|
|
if self.pe.is_virtual(start):
|
|
# Assume this is virtual
|
|
start = self.pe.virtual_to_physical(start)
|
|
|
|
if end is not None and self.pe.is_virtual(end):
|
|
# Assume this is virtual
|
|
end = self.pe.virtual_to_physical(end)
|
|
|
|
# Parse out any dereference instructions.
|
|
prefix, specs = self.parse_format_spec(fmt)
|
|
|
|
return self.__parse_struct(start, end, count, prefix, specs)
|
|
|
|
def __parse_struct(self, start: int, end: Optional[int], count: Optional[int], prefix: str, specs: List[Any]) -> List[Any]:
|
|
# Now, parse out each chunk.
|
|
output = []
|
|
offset = start
|
|
while True:
|
|
if end is not None:
|
|
if offset >= end:
|
|
break
|
|
if count is not None:
|
|
if count <= 0:
|
|
break
|
|
count -= 1
|
|
|
|
line: List[Any] = []
|
|
for spec in specs:
|
|
if isinstance(spec, str):
|
|
if spec[0] == "&":
|
|
dohex = True
|
|
spec = spec[1:]
|
|
else:
|
|
dohex = False
|
|
|
|
if spec[-1] == "#":
|
|
if len(spec) > 1:
|
|
if spec[0] not in "+-":
|
|
raise Exception("Line number offsets must include a '+' or '-' prefix!")
|
|
val = int(spec[:-1], 16 if "0x" in spec else 10)
|
|
else:
|
|
val = 0
|
|
line.append(LineNumber(val, dohex))
|
|
elif spec == "z":
|
|
# Null-terminated string
|
|
bs = b""
|
|
while self.pe.data[offset:(offset + 1)] != b"\x00":
|
|
bs += self.pe.data[offset:(offset + 1)]
|
|
offset += 1
|
|
# Advance past null byte
|
|
offset += 1
|
|
|
|
# Hex makes no sense here
|
|
if dohex:
|
|
raise Exception("Cannot display string as hex!")
|
|
line.append(bs.decode(self.default_encoding))
|
|
else:
|
|
size = struct.calcsize(prefix + spec)
|
|
chunk = self.pe.data[offset:(offset + size)]
|
|
if spec != 'x':
|
|
if dohex:
|
|
line.append(hex(struct.unpack(prefix + spec, chunk)[0]))
|
|
else:
|
|
line.append(struct.unpack(prefix + spec, chunk)[0])
|
|
offset += size
|
|
else:
|
|
if self.pe.is_64bit():
|
|
chunk = self.pe.data[offset:(offset + 8)]
|
|
pointer = struct.unpack(prefix + "Q", chunk)[0]
|
|
offset += 8
|
|
else:
|
|
chunk = self.pe.data[offset:(offset + 4)]
|
|
pointer = struct.unpack(prefix + "I", chunk)[0]
|
|
offset += 4
|
|
|
|
# Resolve the physical address of this pointer, trick the substructure into
|
|
# parsing only one iteration.
|
|
if pointer == 0x0:
|
|
# Null pointer
|
|
line.append(None)
|
|
else:
|
|
pointer = self.pe.virtual_to_physical(pointer)
|
|
subparse = self.__parse_struct(pointer, pointer + 1, None, prefix, spec)
|
|
if len(subparse) != 1:
|
|
raise Exception("Logic error!")
|
|
line.append(subparse[0])
|
|
|
|
output.append(line)
|
|
|
|
return output
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
description="A utility to print structs out of a DLL.",
|
|
epilog=("""
|
|
Some examples of valid format specifiers and what they do are as follows:
|
|
|
|
*h = Decodes an array of short pointers, decoding the resulting shorts for each pointer in the array.
|
|
|
|
*(hbb) = Decodes an array of pointers to a structure containing a short and two bytes, decoding that short and both bytes for each entry in the array.
|
|
|
|
*z = Decodes an array null-terminated string pointers.
|
|
|
|
Ih&h = Decodes an array of structures containing an unsigned integer and two shorts, displaying the second short in hex instead of decimal.
|
|
|
|
#I = Decodes an array of unsigned integers, displaying the array entry number and the integer.
|
|
|
|
+64#h = Decodes an array of shorts, displaying the array entry number starting at 64 and the integer.
|
|
|
|
*z&+0x200# = Decodes an array of null-terminated string pointers, displaying the array entry number in hex starting at 0x200 and string. Broken down, it has the following parts:
|
|
*z = Dereference the current value (*) and treat that integer as a pointer to a null-terminated string (z).
|
|
&+0x200# = Print the current line number (#), offset by the value 0x200 (+0x200) as a hex number (&).
|
|
"""),
|
|
)
|
|
parser.add_argument(
|
|
"--file",
|
|
help="DLL file to extract from.",
|
|
type=str,
|
|
default=None,
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"--start",
|
|
help="Hex offset into the file we should start at. This can be specified as either a raw offset into the DLL or as a virtual offset.",
|
|
type=str,
|
|
default=None,
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"--end",
|
|
help="Hex offset into the file we should go until. Alternatively you can use --count and the end offset will be calclated based on the start and format size.",
|
|
type=str,
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--count",
|
|
help="Number of entries to parse, as a decimal or hex integer. Alternatively you can use --end and the count will be calculated based on the start, end and format size.",
|
|
type=str,
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--encoding",
|
|
help="Encoding to use for strings, such as 'ascii', 'utf-8' or 'shift-jis'.",
|
|
default='ascii',
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--format",
|
|
help=(
|
|
"Python struct format we should print using. See https://docs.python.org/3/library/struct.html "
|
|
"for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. "
|
|
"Surround a chunk of format specifiers with parenthesis to dereference structures. Note that "
|
|
"structures can be arbitrarily nested to decode complex data types. For ease of unpacking C string "
|
|
"pointers, the specifier \"z\" is recognzied to mean null-terminated string. A & preceeding a "
|
|
"format specifier means that we should convert to hex before displaying. For the ease of decoding "
|
|
"enumerations, the specifier \"#\" is recognized to mean entry number. You can provide it an "
|
|
"offset value such as \"+20#\" to start at a certain number."
|
|
),
|
|
type=str,
|
|
default=None,
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"--emulate-code",
|
|
help=(
|
|
"Hex offset pair of addresses where we should emulate x86/x64 code to "
|
|
"reconstuct a dynamic psmap structure, separated by a colon. This can "
|
|
"be specified as either a raw offset into the DLL or as a virtual offset. "
|
|
"If multiple sections must be emulated you can specify this multiple times."
|
|
),
|
|
type=str,
|
|
action='append',
|
|
default=[],
|
|
)
|
|
parser.add_argument(
|
|
"--emulate-function",
|
|
help=(
|
|
"Hex offset address of a function that we should emulate to reconstruct a "
|
|
"dynamic psmap structure. This can be specified as either a raw offset into "
|
|
"the DLL or as a virtual offset. If multiple functions must be emulated you "
|
|
"can specify this multiple times."
|
|
),
|
|
type=str,
|
|
action='append',
|
|
default=[],
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
"-v",
|
|
help="Display verbose parsing info.",
|
|
action="store_true",
|
|
default=False,
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.end is None and args.count is None:
|
|
print("You must specify either an --end or a --count!", file=sys.stderr)
|
|
return 1
|
|
if args.end is not None and args.count is not None:
|
|
print("You cannot specify both an --end and a --count!", file=sys.stderr)
|
|
return 1
|
|
|
|
fp = open(args.file, 'rb')
|
|
data = fp.read()
|
|
fp.close()
|
|
|
|
def __str(obj: object, lineno: int) -> str:
|
|
if obj is None:
|
|
return "NULL"
|
|
elif isinstance(obj, LineNumber):
|
|
return obj.toStr(lineno)
|
|
elif isinstance(obj, list):
|
|
if len(obj) == 1:
|
|
return __str(obj[0], lineno)
|
|
else:
|
|
return f"({', '.join(__str(o, lineno) for o in obj)})"
|
|
else:
|
|
return repr(obj)
|
|
|
|
pe = PEFile(data)
|
|
|
|
# If asked, attempt to emulate code which dynamically constructs the structure
|
|
# we're about to parse.
|
|
if args.emulate_code:
|
|
for chunk in args.emulate_code:
|
|
emulate_start, emulate_end = chunk.split(':', 1)
|
|
start = int(emulate_start, 16)
|
|
end = int(emulate_end, 16)
|
|
pe.emulate_code(start, end, verbose=args.verbose)
|
|
|
|
if args.emulate_function:
|
|
for function_address in args.emulate_function:
|
|
fun = int(function_address, 16)
|
|
pe.emulate_function(fun, verbose=args.verbose)
|
|
|
|
printer = StructPrinter(pe, default_encoding=args.encoding)
|
|
lines = printer.parse_struct(args.start, args.end, args.count, args.format)
|
|
for i, line in enumerate(lines):
|
|
print(", ".join(__str(entry, i) for entry in line))
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|