1
0
mirror of synced 2024-12-01 00:57:18 +01:00

Restructure string decoding, fix non-ascii strings, at least parse out the last remaining tag bytes we don't understand.

This commit is contained in:
Jennifer Taylor 2021-04-03 05:25:27 +00:00
parent 7beb518484
commit d688314d1f

View File

@ -190,6 +190,10 @@ class SWF:
# sections that we aren't parsing correctly. # sections that we aren't parsing correctly.
self.coverage: List[bool] = [False] * len(data) self.coverage: List[bool] = [False] * len(data)
# Initialize string table. This is used for faster lookup of strings
# as well as tracking which strings in the table have been parsed correctly.
self.strings: Dict[int, Tuple[str, bool]] = {}
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None: def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
for i in range(offset, offset + length): for i in range(offset, offset + length):
if self.coverage[i] and unique: if self.coverage[i] and unique:
@ -203,7 +207,7 @@ class SWF:
for offset, covered in enumerate(self.coverage): for offset, covered in enumerate(self.coverage):
if covered: if covered:
if start is not None: if start is not None:
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr) print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
start = None start = None
else: else:
if start is None: if start is None:
@ -211,7 +215,14 @@ class SWF:
if start is not None: if start is not None:
# Print final range # Print final range
offset = len(self.coverage) offset = len(self.coverage)
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr) print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
# Now, print uncovered strings
for offset, (string, covered) in self.strings.items():
if covered:
continue
print(f"Uncovered string: {hex(offset)} - {string}", file=sys.stderr)
def as_dict(self) -> Dict[str, Any]: def as_dict(self) -> Dict[str, Any]:
return { return {
@ -224,60 +235,60 @@ class SWF:
resources: Dict[int, str] = { resources: Dict[int, str] = {
self.END: 'END', self.END: 'END',
self.SHOW_FRAME: 'SHOW_FRAME', self.SHOW_FRAME: 'SHOW_FRAME',
0x2: 'DEFINE_SHAPE', self.DEFINE_SHAPE: 'DEFINE_SHAPE',
0x4: 'PLACE_OBJECT', self.PLACE_OBJECT: 'PLACE_OBJECT',
0x5: 'REMOVE_OBJECT', self.REMOVE_OBJECT: 'REMOVE_OBJECT',
0x6: 'DEFINE_BITS', self.DEFINE_BITS: 'DEFINE_BITS',
0x7: 'DEFINE_BUTTON', self.DEFINE_BUTTON: 'DEFINE_BUTTON',
0x8: 'JPEG_TABLES', self.JPEG_TABLES: 'JPEG_TABLES',
0x9: 'BACKGROUND_COLOR', self.BACKGROUND_COLOR: 'BACKGROUND_COLOR',
0xa: 'DEFINE_FONT', self.DEFINE_FONT: 'DEFINE_FONT',
0xb: 'DEFINE_TEXT', self.DEFINE_TEXT: 'DEFINE_TEXT',
0xc: 'DO_ACTION', self.DO_ACTION: 'DO_ACTION',
0xd: 'DEFINE_FONT_INFO', self.DEFINE_FONT_INFO: 'DEFINE_FONT_INFO',
0xe: 'DEFINE_SOUND', self.DEFINE_SOUND: 'DEFINE_SOUND',
0xf: 'START_SOUND', self.START_SOUND: 'START_SOUND',
0x11: 'DEFINE_BUTTON_SOUND', self.DEFINE_BUTTON_SOUND: 'DEFINE_BUTTON_SOUND',
0x12: 'SOUND_STREAM_HEAD', self.SOUND_STREAM_HEAD: 'SOUND_STREAM_HEAD',
0x13: 'SOUND_STREAM_BLOCK', self.SOUND_STREAM_BLOCK: 'SOUND_STREAM_BLOCK',
0x14: 'DEFINE_BITS_LOSSLESS', self.DEFINE_BITS_LOSSLESS: 'DEFINE_BITS_LOSSLESS',
0x15: 'DEFINE_BITS_JPEG2', self.DEFINE_BITS_JPEG2: 'DEFINE_BITS_JPEG2',
0x16: 'DEFINE_SHAPE2', self.DEFINE_SHAPE2: 'DEFINE_SHAPE2',
0x17: 'DEFINE_BUTTON_CXFORM', self.DEFINE_BUTTON_CXFORM: 'DEFINE_BUTTON_CXFORM',
0x18: 'PROTECT', self.PROTECT: 'PROTECT',
0x1a: 'PLACE_OBJECT2', self.PLACE_OBJECT2: 'PLACE_OBJECT2',
0x1c: 'REMOVE_OBJECT2', self.REMOVE_OBJECT2: 'REMOVE_OBJECT2',
0x20: 'DEFINE_SHAPE3', self.DEFINE_SHAPE3: 'DEFINE_SHAPE3',
0x21: 'DEFINE_TEXT2', self.DEFINE_TEXT2: 'DEFINE_TEXT2',
0x22: 'DEFINE_BUTTON2', self.DEFINE_BUTTON2: 'DEFINE_BUTTON2',
0x23: 'DEFINE_BITS_JPEG3', self.DEFINE_BITS_JPEG3: 'DEFINE_BITS_JPEG3',
0x24: 'DEFINE_BITS_LOSSLESS2', self.DEFINE_BITS_LOSSLESS2: 'DEFINE_BITS_LOSSLESS2',
0x25: 'DEFINE_EDIT_TEXT', self.DEFINE_EDIT_TEXT: 'DEFINE_EDIT_TEXT',
0x27: 'DEFINE_SPRITE', self.DEFINE_SPRITE: 'DEFINE_SPRITE',
0x2b: 'FRAME_LABEL', self.FRAME_LABEL: 'FRAME_LABEL',
0x2d: 'SOUND_STREAM_HEAD2', self.SOUND_STREAM_HEAD2: 'SOUND_STREAM_HEAD2',
0x2e: 'DEFINE_MORPH_SHAPE', self.DEFINE_MORPH_SHAPE: 'DEFINE_MORPH_SHAPE',
0x30: 'DEFINE_FONT2', self.DEFINE_FONT2: 'DEFINE_FONT2',
0x38: 'EXPORT_ASSETS', self.EXPORT_ASSETS: 'EXPORT_ASSETS',
0x39: 'IMPORT_ASSETS', self.IMPORT_ASSETS: 'IMPORT_ASSETS',
0x3b: 'DO_INIT_ACTION', self.DO_INIT_ACTION: 'DO_INIT_ACTION',
0x3c: 'DEFINE_VIDEO_STREAM', self.DEFINE_VIDEO_STREAM: 'DEFINE_VIDEO_STREAM',
0x3d: 'VIDEO_FRAME', self.VIDEO_FRAME: 'VIDEO_FRAME',
0x3e: 'DEFINE_FONT_INFO2', self.DEFINE_FONT_INFO2: 'DEFINE_FONT_INFO2',
0x40: 'ENABLE_DEBUGGER2', self.ENABLE_DEBUGGER2: 'ENABLE_DEBUGGER2',
0x41: 'SCRIPT_LIMITS', self.SCRIPT_LIMITS: 'SCRIPT_LIMITS',
0x42: 'SET_TAB_INDEX', self.SET_TAB_INDEX: 'SET_TAB_INDEX',
0x46: 'PLACE_OBJECT3', self.PLACE_OBJECT3: 'PLACE_OBJECT3',
0x47: 'IMPORT_ASSETS2', self.IMPORT_ASSETS2: 'IMPORT_ASSETS2',
0x4b: 'DEFINE_FONT3', self.DEFINE_FONT3: 'DEFINE_FONT3',
0x4e: 'DEFINE_SCALING_GRID', self.DEFINE_SCALING_GRID: 'DEFINE_SCALING_GRID',
0x4d: 'METADATA', self.METADATA: 'METADATA',
0x53: 'DEFINE_SHAPE4', self.DEFINE_SHAPE4: 'DEFINE_SHAPE4',
0x54: 'DEFINE_MORPH_SHAPE2', self.DEFINE_MORPH_SHAPE2: 'DEFINE_MORPH_SHAPE2',
0x56: 'SCENE_LABEL', self.SCENE_LABEL: 'SCENE_LABEL',
0x64: 'AFP_IMAGE', self.AFP_IMAGE: 'AFP_IMAGE',
0x65: 'AFP_DEFINE_SOUND', self.AFP_DEFINE_SOUND: 'AFP_DEFINE_SOUND',
0x66: 'AFP_SOUND_STREAM_BLOCK', self.AFP_SOUND_STREAM_BLOCK: 'AFP_SOUND_STREAM_BLOCK',
self.AFP_DEFINE_FONT: 'AFP_DEFINE_FONT', self.AFP_DEFINE_FONT: 'AFP_DEFINE_FONT',
self.AFP_DEFINE_SHAPE: 'AFP_DEFINE_SHAPE', self.AFP_DEFINE_SHAPE: 'AFP_DEFINE_SHAPE',
self.AEP_PLACE_OBJECT: 'AEP_PLACE_OBJECT', self.AEP_PLACE_OBJECT: 'AEP_PLACE_OBJECT',
@ -347,13 +358,15 @@ class SWF:
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass pass
tags_unknown1, tags_unknown2, tags_count = struct.unpack("<IIi", ap2data[tags_base_offset:(tags_base_offset + 12)]) tags_unknown1, tags_unknown2, tags_count, tags_unknown3, tags_unknown4, tags_offset, tags_unknown5 = struct.unpack(
tags_offset = struct.unpack("<i", ap2data[(tags_base_offset + 20):(tags_base_offset + 24)])[0] + tags_base_offset "<IIiIIiI",
add_coverage(tags_base_offset, 12) ap2data[tags_base_offset:(tags_base_offset + 28)]
add_coverage(tags_base_offset + 20, 4) )
tags_offset += tags_base_offset
add_coverage(tags_base_offset, 28)
# TODO: Seems that tags_unknown2 has something to do with end of movie stuff? # TODO: Seems that tags_unknown2 has something to do with end of movie stuff?
vprint(f"UNKNOWN: {hex(tags_unknown1)}, {hex(tags_unknown2)}") vprint(f"UNKNOWN: {hex(tags_unknown1)}, {hex(tags_unknown2)}, {hex(tags_unknown3)}, {hex(tags_unknown4)}, {hex(tags_unknown5)}")
vprint(f"Number of Tags: {tags_count}") vprint(f"Number of Tags: {tags_count}")
for i in range(tags_count): for i in range(tags_count):
@ -404,14 +417,34 @@ class SWF:
def __descramble_stringtable(self, scrambled_data: bytes, stringtable_offset: int, stringtable_size: int) -> bytes: def __descramble_stringtable(self, scrambled_data: bytes, stringtable_offset: int, stringtable_size: int) -> bytes:
data = bytearray(scrambled_data) data = bytearray(scrambled_data)
curstring: List[int] = []
curloc = stringtable_offset
addition = 128 addition = 128
for i in range(stringtable_size): for i in range(stringtable_size):
data[stringtable_offset + i] = (data[stringtable_offset + i] - addition) & 0xFF byte = (data[stringtable_offset + i] - addition) & 0xFF
data[stringtable_offset + i] = byte
addition += 1 addition += 1
if byte == 0:
if curstring:
# We found a string!
self.strings[curloc - stringtable_offset] = (bytes(curstring).decode('utf8'), False)
curloc = stringtable_offset + i + 1
curstring = []
curloc = stringtable_offset + i + 1
else:
curstring.append(byte)
if curstring:
raise Exception("Logic error!")
return bytes(data) return bytes(data)
def __get_string(self, offset: int) -> str:
self.strings[offset] = (self.strings[offset][0], True)
return self.strings[offset][0]
def parse(self, verbose: bool = False) -> None: def parse(self, verbose: bool = False) -> None:
# Suppress debug text unless asked # Suppress debug text unless asked
if verbose: if verbose:
@ -422,6 +455,7 @@ class SWF:
# Reinitialize coverage. # Reinitialize coverage.
self.coverage = [False] * len(self.data) self.coverage = [False] * len(self.data)
self.strings = {}
else: else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass pass
@ -429,27 +463,21 @@ class SWF:
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass pass
# First, use the byteswap header to descramble the data.
data = self.__descramble(self.data, self.descramble_info) data = self.__descramble(self.data, self.descramble_info)
def get_until_null(offset: int) -> bytes:
out = b""
while data[offset] != 0:
out += data[offset:(offset + 1)]
offset += 1
return out
# Start with the basic file header. # Start with the basic file header.
magic, length, version, nameoffset, flags, left, right, top, bottom = struct.unpack("<4sIHHIHHHH", data[0:24]) magic, length, version, nameoffset, flags, left, right, top, bottom = struct.unpack("<4sIHHIHHHH", data[0:24])
width = right - left width = right - left
height = bottom - top height = bottom - top
add_coverage(0, 24) add_coverage(0, 24)
ap2_data_version = magic[0] & 0xFF
magic = bytes([magic[3] & 0x7F, magic[2] & 0x7F, magic[1] & 0x7F, 0x0]) magic = bytes([magic[3] & 0x7F, magic[2] & 0x7F, magic[1] & 0x7F, 0x0])
if magic != b'AP2\x00': if magic != b'AP2\x00':
raise Exception(f"Unrecognzied magic {magic}!") raise Exception(f"Unrecognzied magic {magic}!")
if length != len(data): if length != len(data):
raise Exception(f"Unexpected length in AFP header, {length} != {len(data)}!") raise Exception(f"Unexpected length in AFP header, {length} != {len(data)}!")
ap2_data_version = magic[0] & 0xFF
if flags & 0x1: if flags & 0x1:
# I have no idea what this is, but its treated as 4 bytes and something # I have no idea what this is, but its treated as 4 bytes and something
@ -480,11 +508,13 @@ class SWF:
# Descramble string table. # Descramble string table.
data = self.__descramble_stringtable(data, stringtable_offset, stringtable_size) data = self.__descramble_stringtable(data, stringtable_offset, stringtable_size)
add_coverage(stringtable_offset, stringtable_size)
# Get exported SWF name. # Get exported SWF name.
self.exported_name = get_until_null(nameoffset + stringtable_offset).decode('ascii') self.exported_name = self.__get_string(nameoffset)
add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False) add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False)
vprint(f"\nAFP name: {self.name}") vprint(f"\nAFP name: {self.name}")
vprint(f"Container Version: {hex(ap2_data_version)}")
vprint(f"Version: {hex(version)}") vprint(f"Version: {hex(version)}")
vprint(f"Exported Name: {self.exported_name}") vprint(f"Exported Name: {self.exported_name}")
vprint(f"SWF Flags: {hex(flags)}") vprint(f"SWF Flags: {hex(flags)}")
@ -515,7 +545,7 @@ class SWF:
add_coverage(asset_offset, 4) add_coverage(asset_offset, 4)
asset_offset += 4 asset_offset += 4
asset_name = get_until_null(asset_string_offset + stringtable_offset).decode('ascii') asset_name = self.__get_string(asset_string_offset)
add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False) add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False)
vprint(f" {assetno}: {asset_name}") vprint(f" {assetno}: {asset_name}")
@ -537,7 +567,7 @@ class SWF:
swf_name_offset, count = struct.unpack("<HH", data[imported_tags_offset:(imported_tags_offset + 4)]) swf_name_offset, count = struct.unpack("<HH", data[imported_tags_offset:(imported_tags_offset + 4)])
add_coverage(imported_tags_offset, 4) add_coverage(imported_tags_offset, 4)
swf_name = get_until_null(swf_name_offset + stringtable_offset).decode('ascii') swf_name = self.__get_string(swf_name_offset)
add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False) add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False)
vprint(f" Source SWF: {swf_name}") vprint(f" Source SWF: {swf_name}")
@ -546,7 +576,7 @@ class SWF:
asset_id_no, asset_name_offset = struct.unpack("<HH", data[imported_tags_data_offset:(imported_tags_data_offset + 4)]) asset_id_no, asset_name_offset = struct.unpack("<HH", data[imported_tags_data_offset:(imported_tags_data_offset + 4)])
add_coverage(imported_tags_data_offset, 4) add_coverage(imported_tags_data_offset, 4)
asset_name = get_until_null(asset_name_offset + stringtable_offset).decode('ascii') asset_name = self.__get_string(asset_name_offset)
add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False) add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False)
vprint(f" Tag ID: {asset_id_no}, Requested Asset: {asset_name}") vprint(f" Tag ID: {asset_id_no}, Requested Asset: {asset_name}")
@ -1604,14 +1634,13 @@ class AFPFile:
if header_offset != header_length: if header_offset != header_length:
raise Exception("Failed to parse bitfield of header correctly!") raise Exception("Failed to parse bitfield of header correctly!")
if verbose:
self.print_coverage()
# Now, parse out the SWF data in each of the SWF structures we found. # Now, parse out the SWF data in each of the SWF structures we found.
for swf in self.swfdata: for swf in self.swfdata:
swf.parse(verbose) swf.parse(verbose)
if verbose:
self.print_coverage()
@staticmethod @staticmethod
def align(val: int) -> int: def align(val: int) -> int:
return (val + 3) & 0xFFFFFFFFC return (val + 3) & 0xFFFFFFFFC