Restructure string decoding, fix non-ascii strings, at least parse out the last remaining tag bytes we don't understand.
This commit is contained in:
parent
7beb518484
commit
d688314d1f
@ -190,6 +190,10 @@ class SWF:
|
|||||||
# sections that we aren't parsing correctly.
|
# sections that we aren't parsing correctly.
|
||||||
self.coverage: List[bool] = [False] * len(data)
|
self.coverage: List[bool] = [False] * len(data)
|
||||||
|
|
||||||
|
# Initialize string table. This is used for faster lookup of strings
|
||||||
|
# as well as tracking which strings in the table have been parsed correctly.
|
||||||
|
self.strings: Dict[int, Tuple[str, bool]] = {}
|
||||||
|
|
||||||
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
|
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
|
||||||
for i in range(offset, offset + length):
|
for i in range(offset, offset + length):
|
||||||
if self.coverage[i] and unique:
|
if self.coverage[i] and unique:
|
||||||
@ -203,7 +207,7 @@ class SWF:
|
|||||||
for offset, covered in enumerate(self.coverage):
|
for offset, covered in enumerate(self.coverage):
|
||||||
if covered:
|
if covered:
|
||||||
if start is not None:
|
if start is not None:
|
||||||
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
|
print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
|
||||||
start = None
|
start = None
|
||||||
else:
|
else:
|
||||||
if start is None:
|
if start is None:
|
||||||
@ -211,7 +215,14 @@ class SWF:
|
|||||||
if start is not None:
|
if start is not None:
|
||||||
# Print final range
|
# Print final range
|
||||||
offset = len(self.coverage)
|
offset = len(self.coverage)
|
||||||
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
|
print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
|
||||||
|
|
||||||
|
# Now, print uncovered strings
|
||||||
|
for offset, (string, covered) in self.strings.items():
|
||||||
|
if covered:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Uncovered string: {hex(offset)} - {string}", file=sys.stderr)
|
||||||
|
|
||||||
def as_dict(self) -> Dict[str, Any]:
|
def as_dict(self) -> Dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
@ -224,60 +235,60 @@ class SWF:
|
|||||||
resources: Dict[int, str] = {
|
resources: Dict[int, str] = {
|
||||||
self.END: 'END',
|
self.END: 'END',
|
||||||
self.SHOW_FRAME: 'SHOW_FRAME',
|
self.SHOW_FRAME: 'SHOW_FRAME',
|
||||||
0x2: 'DEFINE_SHAPE',
|
self.DEFINE_SHAPE: 'DEFINE_SHAPE',
|
||||||
0x4: 'PLACE_OBJECT',
|
self.PLACE_OBJECT: 'PLACE_OBJECT',
|
||||||
0x5: 'REMOVE_OBJECT',
|
self.REMOVE_OBJECT: 'REMOVE_OBJECT',
|
||||||
0x6: 'DEFINE_BITS',
|
self.DEFINE_BITS: 'DEFINE_BITS',
|
||||||
0x7: 'DEFINE_BUTTON',
|
self.DEFINE_BUTTON: 'DEFINE_BUTTON',
|
||||||
0x8: 'JPEG_TABLES',
|
self.JPEG_TABLES: 'JPEG_TABLES',
|
||||||
0x9: 'BACKGROUND_COLOR',
|
self.BACKGROUND_COLOR: 'BACKGROUND_COLOR',
|
||||||
0xa: 'DEFINE_FONT',
|
self.DEFINE_FONT: 'DEFINE_FONT',
|
||||||
0xb: 'DEFINE_TEXT',
|
self.DEFINE_TEXT: 'DEFINE_TEXT',
|
||||||
0xc: 'DO_ACTION',
|
self.DO_ACTION: 'DO_ACTION',
|
||||||
0xd: 'DEFINE_FONT_INFO',
|
self.DEFINE_FONT_INFO: 'DEFINE_FONT_INFO',
|
||||||
0xe: 'DEFINE_SOUND',
|
self.DEFINE_SOUND: 'DEFINE_SOUND',
|
||||||
0xf: 'START_SOUND',
|
self.START_SOUND: 'START_SOUND',
|
||||||
0x11: 'DEFINE_BUTTON_SOUND',
|
self.DEFINE_BUTTON_SOUND: 'DEFINE_BUTTON_SOUND',
|
||||||
0x12: 'SOUND_STREAM_HEAD',
|
self.SOUND_STREAM_HEAD: 'SOUND_STREAM_HEAD',
|
||||||
0x13: 'SOUND_STREAM_BLOCK',
|
self.SOUND_STREAM_BLOCK: 'SOUND_STREAM_BLOCK',
|
||||||
0x14: 'DEFINE_BITS_LOSSLESS',
|
self.DEFINE_BITS_LOSSLESS: 'DEFINE_BITS_LOSSLESS',
|
||||||
0x15: 'DEFINE_BITS_JPEG2',
|
self.DEFINE_BITS_JPEG2: 'DEFINE_BITS_JPEG2',
|
||||||
0x16: 'DEFINE_SHAPE2',
|
self.DEFINE_SHAPE2: 'DEFINE_SHAPE2',
|
||||||
0x17: 'DEFINE_BUTTON_CXFORM',
|
self.DEFINE_BUTTON_CXFORM: 'DEFINE_BUTTON_CXFORM',
|
||||||
0x18: 'PROTECT',
|
self.PROTECT: 'PROTECT',
|
||||||
0x1a: 'PLACE_OBJECT2',
|
self.PLACE_OBJECT2: 'PLACE_OBJECT2',
|
||||||
0x1c: 'REMOVE_OBJECT2',
|
self.REMOVE_OBJECT2: 'REMOVE_OBJECT2',
|
||||||
0x20: 'DEFINE_SHAPE3',
|
self.DEFINE_SHAPE3: 'DEFINE_SHAPE3',
|
||||||
0x21: 'DEFINE_TEXT2',
|
self.DEFINE_TEXT2: 'DEFINE_TEXT2',
|
||||||
0x22: 'DEFINE_BUTTON2',
|
self.DEFINE_BUTTON2: 'DEFINE_BUTTON2',
|
||||||
0x23: 'DEFINE_BITS_JPEG3',
|
self.DEFINE_BITS_JPEG3: 'DEFINE_BITS_JPEG3',
|
||||||
0x24: 'DEFINE_BITS_LOSSLESS2',
|
self.DEFINE_BITS_LOSSLESS2: 'DEFINE_BITS_LOSSLESS2',
|
||||||
0x25: 'DEFINE_EDIT_TEXT',
|
self.DEFINE_EDIT_TEXT: 'DEFINE_EDIT_TEXT',
|
||||||
0x27: 'DEFINE_SPRITE',
|
self.DEFINE_SPRITE: 'DEFINE_SPRITE',
|
||||||
0x2b: 'FRAME_LABEL',
|
self.FRAME_LABEL: 'FRAME_LABEL',
|
||||||
0x2d: 'SOUND_STREAM_HEAD2',
|
self.SOUND_STREAM_HEAD2: 'SOUND_STREAM_HEAD2',
|
||||||
0x2e: 'DEFINE_MORPH_SHAPE',
|
self.DEFINE_MORPH_SHAPE: 'DEFINE_MORPH_SHAPE',
|
||||||
0x30: 'DEFINE_FONT2',
|
self.DEFINE_FONT2: 'DEFINE_FONT2',
|
||||||
0x38: 'EXPORT_ASSETS',
|
self.EXPORT_ASSETS: 'EXPORT_ASSETS',
|
||||||
0x39: 'IMPORT_ASSETS',
|
self.IMPORT_ASSETS: 'IMPORT_ASSETS',
|
||||||
0x3b: 'DO_INIT_ACTION',
|
self.DO_INIT_ACTION: 'DO_INIT_ACTION',
|
||||||
0x3c: 'DEFINE_VIDEO_STREAM',
|
self.DEFINE_VIDEO_STREAM: 'DEFINE_VIDEO_STREAM',
|
||||||
0x3d: 'VIDEO_FRAME',
|
self.VIDEO_FRAME: 'VIDEO_FRAME',
|
||||||
0x3e: 'DEFINE_FONT_INFO2',
|
self.DEFINE_FONT_INFO2: 'DEFINE_FONT_INFO2',
|
||||||
0x40: 'ENABLE_DEBUGGER2',
|
self.ENABLE_DEBUGGER2: 'ENABLE_DEBUGGER2',
|
||||||
0x41: 'SCRIPT_LIMITS',
|
self.SCRIPT_LIMITS: 'SCRIPT_LIMITS',
|
||||||
0x42: 'SET_TAB_INDEX',
|
self.SET_TAB_INDEX: 'SET_TAB_INDEX',
|
||||||
0x46: 'PLACE_OBJECT3',
|
self.PLACE_OBJECT3: 'PLACE_OBJECT3',
|
||||||
0x47: 'IMPORT_ASSETS2',
|
self.IMPORT_ASSETS2: 'IMPORT_ASSETS2',
|
||||||
0x4b: 'DEFINE_FONT3',
|
self.DEFINE_FONT3: 'DEFINE_FONT3',
|
||||||
0x4e: 'DEFINE_SCALING_GRID',
|
self.DEFINE_SCALING_GRID: 'DEFINE_SCALING_GRID',
|
||||||
0x4d: 'METADATA',
|
self.METADATA: 'METADATA',
|
||||||
0x53: 'DEFINE_SHAPE4',
|
self.DEFINE_SHAPE4: 'DEFINE_SHAPE4',
|
||||||
0x54: 'DEFINE_MORPH_SHAPE2',
|
self.DEFINE_MORPH_SHAPE2: 'DEFINE_MORPH_SHAPE2',
|
||||||
0x56: 'SCENE_LABEL',
|
self.SCENE_LABEL: 'SCENE_LABEL',
|
||||||
0x64: 'AFP_IMAGE',
|
self.AFP_IMAGE: 'AFP_IMAGE',
|
||||||
0x65: 'AFP_DEFINE_SOUND',
|
self.AFP_DEFINE_SOUND: 'AFP_DEFINE_SOUND',
|
||||||
0x66: 'AFP_SOUND_STREAM_BLOCK',
|
self.AFP_SOUND_STREAM_BLOCK: 'AFP_SOUND_STREAM_BLOCK',
|
||||||
self.AFP_DEFINE_FONT: 'AFP_DEFINE_FONT',
|
self.AFP_DEFINE_FONT: 'AFP_DEFINE_FONT',
|
||||||
self.AFP_DEFINE_SHAPE: 'AFP_DEFINE_SHAPE',
|
self.AFP_DEFINE_SHAPE: 'AFP_DEFINE_SHAPE',
|
||||||
self.AEP_PLACE_OBJECT: 'AEP_PLACE_OBJECT',
|
self.AEP_PLACE_OBJECT: 'AEP_PLACE_OBJECT',
|
||||||
@ -347,13 +358,15 @@ class SWF:
|
|||||||
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
|
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
|
||||||
pass
|
pass
|
||||||
|
|
||||||
tags_unknown1, tags_unknown2, tags_count = struct.unpack("<IIi", ap2data[tags_base_offset:(tags_base_offset + 12)])
|
tags_unknown1, tags_unknown2, tags_count, tags_unknown3, tags_unknown4, tags_offset, tags_unknown5 = struct.unpack(
|
||||||
tags_offset = struct.unpack("<i", ap2data[(tags_base_offset + 20):(tags_base_offset + 24)])[0] + tags_base_offset
|
"<IIiIIiI",
|
||||||
add_coverage(tags_base_offset, 12)
|
ap2data[tags_base_offset:(tags_base_offset + 28)]
|
||||||
add_coverage(tags_base_offset + 20, 4)
|
)
|
||||||
|
tags_offset += tags_base_offset
|
||||||
|
add_coverage(tags_base_offset, 28)
|
||||||
|
|
||||||
# TODO: Seems that tags_unknown2 has something to do with end of movie stuff?
|
# TODO: Seems that tags_unknown2 has something to do with end of movie stuff?
|
||||||
vprint(f"UNKNOWN: {hex(tags_unknown1)}, {hex(tags_unknown2)}")
|
vprint(f"UNKNOWN: {hex(tags_unknown1)}, {hex(tags_unknown2)}, {hex(tags_unknown3)}, {hex(tags_unknown4)}, {hex(tags_unknown5)}")
|
||||||
|
|
||||||
vprint(f"Number of Tags: {tags_count}")
|
vprint(f"Number of Tags: {tags_count}")
|
||||||
for i in range(tags_count):
|
for i in range(tags_count):
|
||||||
@ -404,14 +417,34 @@ class SWF:
|
|||||||
|
|
||||||
def __descramble_stringtable(self, scrambled_data: bytes, stringtable_offset: int, stringtable_size: int) -> bytes:
|
def __descramble_stringtable(self, scrambled_data: bytes, stringtable_offset: int, stringtable_size: int) -> bytes:
|
||||||
data = bytearray(scrambled_data)
|
data = bytearray(scrambled_data)
|
||||||
|
curstring: List[int] = []
|
||||||
|
curloc = stringtable_offset
|
||||||
|
|
||||||
addition = 128
|
addition = 128
|
||||||
for i in range(stringtable_size):
|
for i in range(stringtable_size):
|
||||||
data[stringtable_offset + i] = (data[stringtable_offset + i] - addition) & 0xFF
|
byte = (data[stringtable_offset + i] - addition) & 0xFF
|
||||||
|
data[stringtable_offset + i] = byte
|
||||||
addition += 1
|
addition += 1
|
||||||
|
|
||||||
|
if byte == 0:
|
||||||
|
if curstring:
|
||||||
|
# We found a string!
|
||||||
|
self.strings[curloc - stringtable_offset] = (bytes(curstring).decode('utf8'), False)
|
||||||
|
curloc = stringtable_offset + i + 1
|
||||||
|
curstring = []
|
||||||
|
curloc = stringtable_offset + i + 1
|
||||||
|
else:
|
||||||
|
curstring.append(byte)
|
||||||
|
|
||||||
|
if curstring:
|
||||||
|
raise Exception("Logic error!")
|
||||||
|
|
||||||
return bytes(data)
|
return bytes(data)
|
||||||
|
|
||||||
|
def __get_string(self, offset: int) -> str:
|
||||||
|
self.strings[offset] = (self.strings[offset][0], True)
|
||||||
|
return self.strings[offset][0]
|
||||||
|
|
||||||
def parse(self, verbose: bool = False) -> None:
|
def parse(self, verbose: bool = False) -> None:
|
||||||
# Suppress debug text unless asked
|
# Suppress debug text unless asked
|
||||||
if verbose:
|
if verbose:
|
||||||
@ -422,6 +455,7 @@ class SWF:
|
|||||||
|
|
||||||
# Reinitialize coverage.
|
# Reinitialize coverage.
|
||||||
self.coverage = [False] * len(self.data)
|
self.coverage = [False] * len(self.data)
|
||||||
|
self.strings = {}
|
||||||
else:
|
else:
|
||||||
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
|
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
|
||||||
pass
|
pass
|
||||||
@ -429,27 +463,21 @@ class SWF:
|
|||||||
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
|
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# First, use the byteswap header to descramble the data.
|
||||||
data = self.__descramble(self.data, self.descramble_info)
|
data = self.__descramble(self.data, self.descramble_info)
|
||||||
|
|
||||||
def get_until_null(offset: int) -> bytes:
|
|
||||||
out = b""
|
|
||||||
while data[offset] != 0:
|
|
||||||
out += data[offset:(offset + 1)]
|
|
||||||
offset += 1
|
|
||||||
return out
|
|
||||||
|
|
||||||
# Start with the basic file header.
|
# Start with the basic file header.
|
||||||
magic, length, version, nameoffset, flags, left, right, top, bottom = struct.unpack("<4sIHHIHHHH", data[0:24])
|
magic, length, version, nameoffset, flags, left, right, top, bottom = struct.unpack("<4sIHHIHHHH", data[0:24])
|
||||||
width = right - left
|
width = right - left
|
||||||
height = bottom - top
|
height = bottom - top
|
||||||
add_coverage(0, 24)
|
add_coverage(0, 24)
|
||||||
|
|
||||||
|
ap2_data_version = magic[0] & 0xFF
|
||||||
magic = bytes([magic[3] & 0x7F, magic[2] & 0x7F, magic[1] & 0x7F, 0x0])
|
magic = bytes([magic[3] & 0x7F, magic[2] & 0x7F, magic[1] & 0x7F, 0x0])
|
||||||
if magic != b'AP2\x00':
|
if magic != b'AP2\x00':
|
||||||
raise Exception(f"Unrecognzied magic {magic}!")
|
raise Exception(f"Unrecognzied magic {magic}!")
|
||||||
if length != len(data):
|
if length != len(data):
|
||||||
raise Exception(f"Unexpected length in AFP header, {length} != {len(data)}!")
|
raise Exception(f"Unexpected length in AFP header, {length} != {len(data)}!")
|
||||||
ap2_data_version = magic[0] & 0xFF
|
|
||||||
|
|
||||||
if flags & 0x1:
|
if flags & 0x1:
|
||||||
# I have no idea what this is, but its treated as 4 bytes and something
|
# I have no idea what this is, but its treated as 4 bytes and something
|
||||||
@ -480,11 +508,13 @@ class SWF:
|
|||||||
|
|
||||||
# Descramble string table.
|
# Descramble string table.
|
||||||
data = self.__descramble_stringtable(data, stringtable_offset, stringtable_size)
|
data = self.__descramble_stringtable(data, stringtable_offset, stringtable_size)
|
||||||
|
add_coverage(stringtable_offset, stringtable_size)
|
||||||
|
|
||||||
# Get exported SWF name.
|
# Get exported SWF name.
|
||||||
self.exported_name = get_until_null(nameoffset + stringtable_offset).decode('ascii')
|
self.exported_name = self.__get_string(nameoffset)
|
||||||
add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False)
|
add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False)
|
||||||
vprint(f"\nAFP name: {self.name}")
|
vprint(f"\nAFP name: {self.name}")
|
||||||
|
vprint(f"Container Version: {hex(ap2_data_version)}")
|
||||||
vprint(f"Version: {hex(version)}")
|
vprint(f"Version: {hex(version)}")
|
||||||
vprint(f"Exported Name: {self.exported_name}")
|
vprint(f"Exported Name: {self.exported_name}")
|
||||||
vprint(f"SWF Flags: {hex(flags)}")
|
vprint(f"SWF Flags: {hex(flags)}")
|
||||||
@ -515,7 +545,7 @@ class SWF:
|
|||||||
add_coverage(asset_offset, 4)
|
add_coverage(asset_offset, 4)
|
||||||
asset_offset += 4
|
asset_offset += 4
|
||||||
|
|
||||||
asset_name = get_until_null(asset_string_offset + stringtable_offset).decode('ascii')
|
asset_name = self.__get_string(asset_string_offset)
|
||||||
add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False)
|
add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False)
|
||||||
vprint(f" {assetno}: {asset_name}")
|
vprint(f" {assetno}: {asset_name}")
|
||||||
|
|
||||||
@ -537,7 +567,7 @@ class SWF:
|
|||||||
swf_name_offset, count = struct.unpack("<HH", data[imported_tags_offset:(imported_tags_offset + 4)])
|
swf_name_offset, count = struct.unpack("<HH", data[imported_tags_offset:(imported_tags_offset + 4)])
|
||||||
add_coverage(imported_tags_offset, 4)
|
add_coverage(imported_tags_offset, 4)
|
||||||
|
|
||||||
swf_name = get_until_null(swf_name_offset + stringtable_offset).decode('ascii')
|
swf_name = self.__get_string(swf_name_offset)
|
||||||
add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False)
|
add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False)
|
||||||
vprint(f" Source SWF: {swf_name}")
|
vprint(f" Source SWF: {swf_name}")
|
||||||
|
|
||||||
@ -546,7 +576,7 @@ class SWF:
|
|||||||
asset_id_no, asset_name_offset = struct.unpack("<HH", data[imported_tags_data_offset:(imported_tags_data_offset + 4)])
|
asset_id_no, asset_name_offset = struct.unpack("<HH", data[imported_tags_data_offset:(imported_tags_data_offset + 4)])
|
||||||
add_coverage(imported_tags_data_offset, 4)
|
add_coverage(imported_tags_data_offset, 4)
|
||||||
|
|
||||||
asset_name = get_until_null(asset_name_offset + stringtable_offset).decode('ascii')
|
asset_name = self.__get_string(asset_name_offset)
|
||||||
add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False)
|
add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False)
|
||||||
vprint(f" Tag ID: {asset_id_no}, Requested Asset: {asset_name}")
|
vprint(f" Tag ID: {asset_id_no}, Requested Asset: {asset_name}")
|
||||||
|
|
||||||
@ -1604,14 +1634,13 @@ class AFPFile:
|
|||||||
|
|
||||||
if header_offset != header_length:
|
if header_offset != header_length:
|
||||||
raise Exception("Failed to parse bitfield of header correctly!")
|
raise Exception("Failed to parse bitfield of header correctly!")
|
||||||
|
if verbose:
|
||||||
|
self.print_coverage()
|
||||||
|
|
||||||
# Now, parse out the SWF data in each of the SWF structures we found.
|
# Now, parse out the SWF data in each of the SWF structures we found.
|
||||||
for swf in self.swfdata:
|
for swf in self.swfdata:
|
||||||
swf.parse(verbose)
|
swf.parse(verbose)
|
||||||
|
|
||||||
if verbose:
|
|
||||||
self.print_coverage()
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def align(val: int) -> int:
|
def align(val: int) -> int:
|
||||||
return (val + 3) & 0xFFFFFFFFC
|
return (val + 3) & 0xFFFFFFFFC
|
||||||
|
Loading…
Reference in New Issue
Block a user