1
0
mirror of synced 2025-01-18 22:24:04 +01:00

Fix "pop" instruction, fix N^2 slowdown in PE emulation, add support for

length prefix for "z", support correct encoding for "s".
This commit is contained in:
Jennifer Taylor 2024-12-31 23:18:28 +00:00
parent 480d0f5baf
commit 61a2b19c71
2 changed files with 31 additions and 10 deletions

View File

@ -85,6 +85,10 @@ class PEFile:
def __init__(self, data: bytes) -> None: def __init__(self, data: bytes) -> None:
self.data = data self.data = data
self.__pe = pefile.PE(data=data, fast_load=True) self.__pe = pefile.PE(data=data, fast_load=True)
# Mapping of ad-hoc virtual addresses, which get added to during runtime. For the purpose
# of our emulation, we just tack values to the end of the physical binary and add an ad-hoc
# mapping. The mapping is indexed by virtual address and points to a physical binary offset.
self.__adhoc_mapping: Dict[int, int] = {} self.__adhoc_mapping: Dict[int, int] = {}
def virtual_to_physical(self, offset: int) -> int: def virtual_to_physical(self, offset: int) -> int:
@ -95,9 +99,8 @@ class PEFile:
if offset >= start and offset < end: if offset >= start and offset < end:
return (offset - start) + section.PointerToRawData return (offset - start) + section.PointerToRawData
for virtual, physical in self.__adhoc_mapping.items(): if offset in self.__adhoc_mapping:
if offset == virtual: return self.__adhoc_mapping[offset]
return physical
raise InvalidVirtualOffsetException(f"Couldn't find physical offset for virtual offset 0x{offset:08x}") raise InvalidVirtualOffsetException(f"Couldn't find physical offset for virtual offset 0x{offset:08x}")
@ -360,7 +363,7 @@ class PEFile:
vprint(f"pop {dest}") vprint(f"pop {dest}")
size = get_size(src) size = get_size(dest)
if size is None: if size is None:
raise Exception(f"Could not determine size of {mnemonic} operation!") raise Exception(f"Could not determine size of {mnemonic} operation!")
result = fetch(registers, memory, size, "[rsp]" if self.is_64bit() else "[esp]") result = fetch(registers, memory, size, "[rsp]" if self.is_64bit() else "[esp]")

View File

@ -181,9 +181,26 @@ class StructPrinter:
raise Exception("Cannot display string as hex!") raise Exception("Cannot display string as hex!")
line.append(bs.decode(self.default_encoding)) line.append(bs.decode(self.default_encoding))
else: else:
# Trick python into supporting our "z" format if it has length numbers on it.
nullTerminated = False
if spec[-1] == "z":
nullTerminated = True
spec = spec[:-1] + "s"
size = struct.calcsize(prefix + spec) size = struct.calcsize(prefix + spec)
chunk = self.pe.data[offset : (offset + size)] chunk = self.pe.data[offset : (offset + size)]
if spec != "x":
if spec[-1] == "s":
# Support length for s/z with proper decoding.
if nullTerminated:
# Null-terminated so we should remove any nulls.
while chunk and chunk[-1:] == b"\x00":
chunk = chunk[:-1]
if dohex:
raise Exception("Cannot display string as hex!")
line.append(chunk.decode(self.default_encoding))
elif spec != "x":
if dohex: if dohex:
line.append(hex(struct.unpack(prefix + spec, chunk)[0])) line.append(hex(struct.unpack(prefix + spec, chunk)[0]))
else: else:
@ -228,7 +245,7 @@ Some examples of valid format specifiers and what they do are as follows:
*(hbb) = Decodes an array of pointers to a structure containing a short and two bytes, decoding that short and both bytes for each entry in the array. *(hbb) = Decodes an array of pointers to a structure containing a short and two bytes, decoding that short and both bytes for each entry in the array.
*z = Decodes an array null-terminated string pointers. *z = Decodes an array of null-terminated string pointers.
Ih&h = Decodes an array of structures containing an unsigned integer and two shorts, displaying the second short in hex instead of decimal. Ih&h = Decodes an array of structures containing an unsigned integer and two shorts, displaying the second short in hex instead of decimal.
@ -281,10 +298,11 @@ Ih&h = Decodes an array of structures containing an unsigned integer and two sho
"for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. " "for details. Additionally, prefixing a format specifier with * allows dereferencing pointers. "
"Surround a chunk of format specifiers with parenthesis to dereference structures. Note that " "Surround a chunk of format specifiers with parenthesis to dereference structures. Note that "
"structures can be arbitrarily nested to decode complex data types. For ease of unpacking C string " "structures can be arbitrarily nested to decode complex data types. For ease of unpacking C string "
'pointers, the specifier "z" is recognzied to mean null-terminated string. A & preceeding a ' 'pointers, the specifier "z" is recognzied to mean null-terminated string. Much like the "s" specifier '
"format specifier means that we should convert to hex before displaying. For the ease of decoding " 'the "z" specifier is allowed an integer prefix for inline length. Both "s" and "z" respect the '
'enumerations, the specifier "#" is recognized to mean entry number. You can provide it an ' "specified encoding. A & preceeding a format specifier means that we should convert to hex before "
'offset value such as "+20#" to start at a certain number.' 'displaying. For the ease of decoding enumerations, the specifier "#" is recognized to mean entry '
'number. You can provide it an offset value such as "+20#" to start at a certain number.'
), ),
type=str, type=str,
default=None, default=None,