2019-12-08 22:43:49 +01:00
import argparse
import struct
2021-04-07 00:24:00 +02:00
import sys
from typing import Optional , Tuple , List , Any
2019-12-08 22:43:49 +01:00
2021-08-15 02:41:37 +02:00
from bemani . common import PEFile
2019-12-08 22:43:49 +01:00
2021-04-07 00:24:16 +02:00
class LineNumber :
def __init__ ( self , offset : int , hex : bool ) - > None :
self . offset = offset
self . hex = hex
def toStr ( self , lineno : int ) - > str :
if self . hex :
return str ( hex ( self . offset + lineno ) )
else :
return str ( self . offset + lineno )
2021-01-15 02:03:29 +01:00
class StructPrinter :
2021-09-04 04:49:13 +02:00
def __init__ ( self , pe : PEFile , default_encoding : str = " ascii " ) - > None :
2021-08-28 19:08:07 +02:00
self . default_encoding = default_encoding
2021-09-04 04:49:13 +02:00
self . pe = pe
2019-12-08 22:43:49 +01:00
2021-01-15 02:03:29 +01:00
def parse_format_spec ( self , fmt : str ) - > Tuple [ str , List [ Any ] ] :
prefix : str = " "
cur_accum : str = " "
specs : List [ Any ] = [ ]
in_prefix : bool = True
in_dereference : bool = False
parens : int = 0
for c in fmt :
if in_prefix :
# Remember byte ordering prefix.
if c in [ " @ " , " = " , " < " , " > " , " ! " ] :
prefix + = c
continue
else :
in_prefix = False
if c == " * " :
if parens == 0 :
# Track if we're in a dereference section.
if not in_dereference :
in_dereference = True
if cur_accum :
raise Exception ( " Cannot have dereference marker in middle of specifier! " )
else :
# Double-indirect dereference.
cur_accum + = c
else :
# Just add it, its part of a subsection.
cur_accum + = c
continue
if c == " ( " :
# Clump together format specs inside parens.
if not in_dereference :
raise Exception ( " Cannot have parenthesis in middle of specifier! " )
if parens > 0 :
cur_accum + = c
parens + = 1
continue
if c == " ) " :
# If we hit the end of a paren, we gotta recursively parse.
if not in_dereference :
raise Exception ( " Cannot have parenthesis in middle of specifier! " )
parens - = 1
if parens > 0 :
cur_accum + = c
else :
# Parse the accumulated data as its own format spec.
_ , subspec = self . parse_format_spec ( cur_accum )
cur_accum = " "
in_dereference = False
specs . append ( subspec )
continue
2021-04-07 00:24:16 +02:00
# If we have either an integer prefix, or an offset prefix, accumulate here.
if c . isdigit ( ) or c in ' +- ' or ( c in ' xabcdefABCDEF ' and ( ' + ' in cur_accum or ' - ' in cur_accum ) ) :
2021-01-15 02:03:29 +01:00
cur_accum + = c
continue
if c == " & " :
if cur_accum :
raise Exception ( " Hex specifier should be at beginning of specifier! " )
cur_accum + = c
continue
cur_accum + = c
# If we're dereferencing, still do the subparse even though its only one thing.
if parens == 0 :
if in_dereference :
_ , subspec = self . parse_format_spec ( cur_accum )
specs . append ( subspec )
in_dereference = False
else :
specs . append ( cur_accum )
cur_accum = " "
return prefix , specs
2021-04-07 00:24:00 +02:00
def parse_struct ( self , startaddr : str , endaddr : str , countstr : str , fmt : str ) - > List [ Any ] :
2021-01-15 02:03:29 +01:00
start : int = int ( startaddr , 16 )
2021-04-07 00:24:00 +02:00
end : Optional [ int ] = int ( endaddr , 16 ) if endaddr is not None else None
count : Optional [ int ] = int ( countstr , 16 if " 0x " in countstr else 10 ) if countstr is not None else None
if end is None and count is None :
raise Exception ( " Can ' t handle endless structures! " )
if end is not None and count is not None :
raise Exception ( " Can ' t handle providing two ends! " )
2021-01-15 02:03:29 +01:00
2021-08-15 02:41:37 +02:00
if self . pe . is_virtual ( start ) :
2021-01-15 02:03:29 +01:00
# Assume this is virtual
2021-08-15 02:41:37 +02:00
start = self . pe . virtual_to_physical ( start )
2021-01-15 02:03:29 +01:00
2021-08-15 02:41:37 +02:00
if end is not None and self . pe . is_virtual ( end ) :
2021-01-15 02:03:29 +01:00
# Assume this is virtual
2021-08-15 02:41:37 +02:00
end = self . pe . virtual_to_physical ( end )
2021-01-15 02:03:29 +01:00
# Parse out any dereference instructions.
prefix , specs = self . parse_format_spec ( fmt )
2021-04-07 00:24:00 +02:00
return self . __parse_struct ( start , end , count , prefix , specs )
2021-01-15 02:03:29 +01:00
2021-04-07 00:24:00 +02:00
def __parse_struct ( self , start : int , end : Optional [ int ] , count : Optional [ int ] , prefix : str , specs : List [ Any ] ) - > List [ Any ] :
2021-01-15 02:03:29 +01:00
# Now, parse out each chunk.
output = [ ]
offset = start
2021-04-07 00:24:00 +02:00
while True :
if end is not None :
if offset > = end :
break
if count is not None :
if count < = 0 :
break
count - = 1
2021-04-07 00:24:16 +02:00
line : List [ Any ] = [ ]
2021-01-15 02:03:29 +01:00
for spec in specs :
if isinstance ( spec , str ) :
if spec [ 0 ] == " & " :
dohex = True
spec = spec [ 1 : ]
else :
dohex = False
2021-04-07 00:24:16 +02:00
if spec [ - 1 ] == " # " :
if len ( spec ) > 1 :
if spec [ 0 ] not in " +- " :
raise Exception ( " Line number offsets must include a ' + ' or ' - ' prefix! " )
val = int ( spec [ : - 1 ] , 16 if " 0x " in spec else 10 )
else :
val = 0
line . append ( LineNumber ( val , dohex ) )
elif spec == " z " :
2021-01-15 02:03:29 +01:00
# Null-terminated string
bs = b " "
2021-09-04 04:49:13 +02:00
while self . pe . data [ offset : ( offset + 1 ) ] != b " \x00 " :
bs + = self . pe . data [ offset : ( offset + 1 ) ]
2021-01-15 02:03:29 +01:00
offset + = 1
# Advance past null byte
offset + = 1
2019-12-08 22:43:49 +01:00
2021-01-15 02:03:29 +01:00
# Hex makes no sense here
if dohex :
raise Exception ( " Cannot display string as hex! " )
2021-08-28 19:08:07 +02:00
line . append ( bs . decode ( self . default_encoding ) )
2021-01-15 02:03:29 +01:00
else :
size = struct . calcsize ( prefix + spec )
2021-09-04 04:49:13 +02:00
chunk = self . pe . data [ offset : ( offset + size ) ]
2021-04-03 07:26:48 +02:00
if spec != ' x ' :
if dohex :
line . append ( hex ( struct . unpack ( prefix + spec , chunk ) [ 0 ] ) )
else :
line . append ( struct . unpack ( prefix + spec , chunk ) [ 0 ] )
2021-01-15 02:03:29 +01:00
offset + = size
else :
2021-08-15 02:41:37 +02:00
if self . pe . is_64bit ( ) :
2021-09-04 04:49:13 +02:00
chunk = self . pe . data [ offset : ( offset + 8 ) ]
2021-08-15 02:41:37 +02:00
pointer = struct . unpack ( prefix + " Q " , chunk ) [ 0 ]
offset + = 8
else :
2021-09-04 04:49:13 +02:00
chunk = self . pe . data [ offset : ( offset + 4 ) ]
2021-08-15 02:41:37 +02:00
pointer = struct . unpack ( prefix + " I " , chunk ) [ 0 ]
offset + = 4
2019-12-08 22:43:49 +01:00
2021-01-15 02:03:29 +01:00
# Resolve the physical address of this pointer, trick the substructure into
# parsing only one iteration.
2021-04-05 02:51:34 +02:00
if pointer == 0x0 :
# Null pointer
line . append ( None )
else :
2021-08-15 02:41:37 +02:00
pointer = self . pe . virtual_to_physical ( pointer )
2021-04-07 00:24:00 +02:00
subparse = self . __parse_struct ( pointer , pointer + 1 , None , prefix , spec )
2021-04-05 02:51:34 +02:00
if len ( subparse ) != 1 :
raise Exception ( " Logic error! " )
line . append ( subparse [ 0 ] )
2019-12-08 22:43:49 +01:00
2021-01-15 02:03:29 +01:00
output . append ( line )
2019-12-08 22:43:49 +01:00
2021-01-15 02:03:29 +01:00
return output
2019-12-08 22:43:49 +01:00
2021-04-07 00:24:00 +02:00
def main ( ) - > int :
2021-08-12 17:57:54 +02:00
parser = argparse . ArgumentParser (
formatter_class = argparse . RawDescriptionHelpFormatter ,
description = " A utility to print structs out of a DLL. " ,
epilog = ( """
Some examples of valid format specifiers and what they do are as follows :
* h = Decodes an array of short pointers , decoding the resulting shorts for each pointer in the array .
* ( hbb ) = Decodes an array of pointers to a structure containing a short and two bytes , decoding that short and both bytes for each entry in the array .
* z = Decodes an array null - terminated string pointers .
Ih & h = Decodes an array of structures containing an unsigned integer and two shorts , displaying the second short in hex instead of decimal .
#I = Decodes an array of unsigned integers, displaying the array entry number and the integer.
+ 64 #h = Decodes an array of shorts, displaying the array entry number starting at 64 and the integer.
* z & + 0x200 # = Decodes an array of null-terminated string pointers, displaying the array entry number in hex starting at 0x200 and string. Broken down, it has the following parts:
* z = Dereference the current value ( * ) and treat that integer as a pointer to a null - terminated string ( z ) .
& + 0x200 # = Print the current line number (#), offset by the value 0x200 (+0x200) as a hex number (&).
""" ),
)
2019-12-08 22:43:49 +01:00
parser . add_argument (
" --file " ,
help = " DLL file to extract from. " ,
type = str ,
default = None ,
required = True ,
)
parser . add_argument (
" --start " ,
2021-08-12 17:57:54 +02:00
help = " Hex offset into the file we should start at. This can be specified as either a raw offset into the DLL or as a virtual offset. " ,
2019-12-08 22:43:49 +01:00
type = str ,
default = None ,
required = True ,
)
parser . add_argument (
" --end " ,
2021-08-12 17:57:54 +02:00
help = " Hex offset into the file we should go until. Alternatively you can use --count and the end offset will be calclated based on the start and format size. " ,
2021-04-07 00:24:00 +02:00
type = str ,
default = None ,
)
parser . add_argument (
" --count " ,
2021-08-12 17:57:54 +02:00
help = " Number of entries to parse, as a decimal or hex integer. Alternatively you can use --end and the count will be calculated based on the start, end and format size. " ,
2019-12-08 22:43:49 +01:00
type = str ,
default = None ,
)
2021-08-28 19:08:07 +02:00
parser . add_argument (
" --encoding " ,
help = " Encoding to use for strings, such as ' ascii ' , ' utf-8 ' or ' shift-jis ' . " ,
default = ' ascii ' ,
type = str ,
)
2019-12-08 22:43:49 +01:00
parser . add_argument (
" --format " ,
2021-01-15 02:03:29 +01:00
help = (
" Python struct format we should print using. See https://docs.python.org/3/library/struct.html "
" for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. "
2021-08-12 17:57:54 +02:00
" Surround a chunk of format specifiers with parenthesis to dereference structures. Note that "
" structures can be arbitrarily nested to decode complex data types. For ease of unpacking C string "
" pointers, the specifier \" z \" is recognzied to mean null-terminated string. A & preceeding a "
" format specifier means that we should convert to hex before displaying. For the ease of decoding "
" enumerations, the specifier \" # \" is recognized to mean entry number. You can provide it an "
" offset value such as \" +20# \" to start at a certain number. "
2021-01-15 02:03:29 +01:00
) ,
2019-12-08 22:43:49 +01:00
type = str ,
default = None ,
required = True ,
)
2021-09-04 04:49:13 +02:00
parser . add_argument (
" --emulate-code " ,
help = (
" Hex offset pair of addresses where we should emulate x86/x64 code to "
" reconstuct a dynamic psmap structure, separated by a colon. This can "
" be specified as either a raw offset into the DLL or as a virtual offset. "
" If multiple sections must be emulated you can specify this multiple times. "
) ,
type = str ,
action = ' append ' ,
default = [ ] ,
)
parser . add_argument (
" --emulate-function " ,
help = (
" Hex offset address of a function that we should emulate to reconstruct a "
" dynamic psmap structure. This can be specified as either a raw offset into "
" the DLL or as a virtual offset. If multiple functions must be emulated you "
" can specify this multiple times. "
) ,
type = str ,
action = ' append ' ,
default = [ ] ,
)
parser . add_argument (
" --verbose " ,
help = " Display verbose parsing info. " ,
action = " store_true " ,
default = False ,
)
2019-12-08 22:43:49 +01:00
args = parser . parse_args ( )
2021-04-07 00:24:00 +02:00
if args . end is None and args . count is None :
print ( " You must specify either an --end or a --count! " , file = sys . stderr )
return 1
if args . end is not None and args . count is not None :
print ( " You cannot specify both an --end and a --count! " , file = sys . stderr )
return 1
2019-12-08 22:43:49 +01:00
fp = open ( args . file , ' rb ' )
data = fp . read ( )
fp . close ( )
2021-04-07 00:24:16 +02:00
def __str ( obj : object , lineno : int ) - > str :
2021-04-05 02:51:34 +02:00
if obj is None :
return " NULL "
2021-04-07 00:24:16 +02:00
elif isinstance ( obj , LineNumber ) :
return obj . toStr ( lineno )
2021-04-05 02:51:34 +02:00
elif isinstance ( obj , list ) :
if len ( obj ) == 1 :
2021-04-07 00:24:16 +02:00
return __str ( obj [ 0 ] , lineno )
2021-04-05 02:51:34 +02:00
else :
2021-04-07 00:24:16 +02:00
return f " ( { ' , ' . join ( __str ( o , lineno ) for o in obj ) } ) "
2021-04-05 02:51:34 +02:00
else :
return repr ( obj )
2021-09-04 04:49:13 +02:00
pe = PEFile ( data )
# If asked, attempt to emulate code which dynamically constructs the structure
# we're about to parse.
if args . emulate_code :
for chunk in args . emulate_code :
emulate_start , emulate_end = chunk . split ( ' : ' , 1 )
start = int ( emulate_start , 16 )
end = int ( emulate_end , 16 )
pe . emulate_code ( start , end , verbose = args . verbose )
if args . emulate_function :
for function_address in args . emulate_function :
fun = int ( function_address , 16 )
pe . emulate_function ( fun , verbose = args . verbose )
printer = StructPrinter ( pe , default_encoding = args . encoding )
2021-04-07 00:24:00 +02:00
lines = printer . parse_struct ( args . start , args . end , args . count , args . format )
2021-04-07 00:24:16 +02:00
for i , line in enumerate ( lines ) :
print ( " , " . join ( __str ( entry , i ) for entry in line ) )
2019-12-08 22:43:49 +01:00
2021-04-07 00:24:00 +02:00
return 0
2019-12-08 22:43:49 +01:00
if __name__ == ' __main__ ' :
2021-04-07 00:24:00 +02:00
sys . exit ( main ( ) )