1
0
mirror of synced 2024-11-28 07:50:51 +01:00

Refactor coverage stuff out to its own utility class, clean up code that used it.

This commit is contained in:
Jennifer Taylor 2021-04-11 20:44:55 +00:00
parent 30a51f48e6
commit 897e779b20
3 changed files with 174 additions and 187 deletions

View File

@ -12,7 +12,7 @@ from bemani.protocol.node import Node
from .swf import SWF
from .geo import Shape
from .util import scramble_text, descramble_text, pad, align, _hex
from .util import TrackedCoverage, scramble_text, descramble_text, pad, align, _hex
class PMAN:
@ -140,11 +140,10 @@ class Unknown2:
}
class TXP2File:
class TXP2File(TrackedCoverage):
def __init__(self, contents: bytes, verbose: bool = False) -> None:
# Initialize coverage. This is used to help find missed/hidden file
# sections that we aren't parsing correctly.
self.coverage: List[bool] = [False] * len(contents)
# Make sure our coverage engine is initialized.
super().__init__()
# Original file data that we parse into structures.
self.data = contents
@ -213,14 +212,9 @@ class TXP2File:
self.unk_pman2: PMAN = PMAN()
# Parse out the file structure.
with self.covered(len(contents), verbose):
self.__parse(verbose)
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
for i in range(offset, offset + length):
if self.coverage[i] and unique:
raise Exception(f"Already covered {hex(offset)}!")
self.coverage[i] = True
def as_dict(self) -> Dict[str, Any]:
return {
'endian': self.endian,
@ -244,23 +238,6 @@ class TXP2File:
'unknown2map': self.unk_pman2.as_dict(),
}
def print_coverage(self) -> None:
# First offset that is not coverd in a run.
start = None
for offset, covered in enumerate(self.coverage):
if covered:
if start is not None:
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
start = None
else:
if start is None:
start = offset
if start is not None:
# Print final range
offset = len(self.coverage)
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
@staticmethod
def cap32(val: int) -> int:
return val & 0xFFFFFFFF
@ -293,22 +270,17 @@ class TXP2File:
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
# Unclear what the first three unknowns are, but the fourth
# looks like it could possibly be two int16s indicating unknown?
magic, expect_zero, flags1, flags2, numentries, flags3, data_offset = struct.unpack(
f"{self.endian}4sIIIIII",
self.data[offset:(offset + 28)],
)
add_coverage(offset, 28)
self.add_coverage(offset, 28)
# I have never seen the first unknown be anything other than zero,
# so lets lock that down.
@ -330,13 +302,13 @@ class TXP2File:
f"{self.endian}III",
self.data[file_offset:(file_offset + 12)],
)
add_coverage(file_offset, 12)
self.add_coverage(file_offset, 12)
if nameoffset == 0:
raise Exception("Expected name offset in PMAN data!")
bytedata = self.get_until_null(nameoffset)
add_coverage(nameoffset, len(bytedata) + 1, unique=False)
self.add_coverage(nameoffset, len(bytedata) + 1, unique=False)
name = descramble_text(bytedata, self.text_obfuscated)
names[entry_no] = name
ordering[entry_no] = i
@ -369,15 +341,10 @@ class TXP2File:
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
# First, check the signature
if self.data[0:4] == b"2PXT":
self.endian = "<"
@ -385,29 +352,29 @@ class TXP2File:
self.endian = ">"
else:
raise Exception("Invalid graphic file format!")
add_coverage(0, 4)
self.add_coverage(0, 4)
# Not sure what words 2 and 3 are, they seem to be some sort of
# version or date?
self.file_flags = self.data[4:12]
add_coverage(4, 8)
self.add_coverage(4, 8)
# Now, grab the file length, verify that we have the right amount
# of data.
length = struct.unpack(f"{self.endian}I", self.data[12:16])[0]
add_coverage(12, 4)
self.add_coverage(12, 4)
if length != len(self.data):
raise Exception(f"Invalid graphic file length, expecting {length} bytes!")
# This is always the header length, or the offset of the data payload.
header_length = struct.unpack(f"{self.endian}I", self.data[16:20])[0]
add_coverage(16, 4)
self.add_coverage(16, 4)
# Now, the meat of the file format. Bytes 20-24 are a bitfield for
# what parts of the header exist in the file. We need to understand
# each bit so we know how to skip past each section.
feature_mask = struct.unpack(f"{self.endian}I", self.data[20:24])[0]
add_coverage(20, 4)
self.add_coverage(20, 4)
header_offset = 24
# Lots of magic happens if this bit is set.
@ -419,7 +386,7 @@ class TXP2File:
if feature_mask & 0x01:
# List of textures that exist in the file, with pointers to their data.
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x000001 - textures; count: {length}, offset: {hex(offset)}")
@ -431,12 +398,12 @@ class TXP2File:
f"{self.endian}III",
self.data[interesting_offset:(interesting_offset + 12)],
)
add_coverage(interesting_offset, 12)
self.add_coverage(interesting_offset, 12)
if name_offset != 0:
# Let's decode this until the first null.
bytedata = self.get_until_null(name_offset)
add_coverage(name_offset, len(bytedata) + 1, unique=False)
self.add_coverage(name_offset, len(bytedata) + 1, unique=False)
name = descramble_text(bytedata, self.text_obfuscated)
if name_offset != 0 and texture_offset != 0:
@ -448,7 +415,7 @@ class TXP2File:
">II",
self.data[texture_offset:(texture_offset + 8)],
)
add_coverage(texture_offset, 8)
self.add_coverage(texture_offset, 8)
if deflated_size != (texture_length - 8):
raise Exception("We got an incorrect length for lz texture!")
vprint(f" {name}, length: {texture_length}, offset: {hex(texture_offset)}, deflated_size: {deflated_size}, inflated_size: {inflated_size}")
@ -457,7 +424,7 @@ class TXP2File:
# Get the data offset.
lz_data_offset = texture_offset + 8
lz_data = self.data[lz_data_offset:(lz_data_offset + deflated_size)]
add_coverage(lz_data_offset, deflated_size)
self.add_coverage(lz_data_offset, deflated_size)
# This takes forever, so skip it if we're pretending.
lz77 = Lz77()
@ -477,7 +444,7 @@ class TXP2File:
# Just grab the raw data.
lz_data = None
raw_data = self.data[(texture_offset + 8):(texture_offset + 8 + deflated_size)]
add_coverage(texture_offset, deflated_size + 8)
self.add_coverage(texture_offset, deflated_size + 8)
(
magic,
@ -691,7 +658,7 @@ class TXP2File:
if feature_mask & 0x02:
# Mapping of texture name to texture index. This is used by regions to look up textures.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x000002 - texturemapping; offset: {hex(offset)}")
@ -713,7 +680,7 @@ class TXP2File:
# This is 10 bytes per entry. Seems to need both 0x2 (texture index)
# and 0x10 (region index).
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x000008 - regions; count: {length}, offset: {hex(offset)}")
@ -725,7 +692,7 @@ class TXP2File:
f"{self.endian}HHHHH",
self.data[descriptor_offset:(descriptor_offset + 10)],
)
add_coverage(descriptor_offset, 10)
self.add_coverage(descriptor_offset, 10)
if texture_no < 0 or texture_no >= len(self.texturemap.entries):
raise Exception(f"Out of bounds texture {texture_no}")
@ -743,7 +710,7 @@ class TXP2File:
# Names of the graphics regions, so we can look into the texture_to_region
# mapping above. Used by shapes to find the right region offset given a name.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x000010 - regionmapping; offset: {hex(offset)}")
@ -762,7 +729,7 @@ class TXP2File:
# Two unknown bytes, first is a length or a count. Secound is
# an optional offset to grab another set of bytes from.
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x000040 - unknown; count: {length}, offset: {hex(offset)}")
@ -771,7 +738,7 @@ class TXP2File:
for i in range(length):
unk_offset = offset + (i * 16)
name_offset = struct.unpack(f"{self.endian}I", self.data[unk_offset:(unk_offset + 4)])[0]
add_coverage(unk_offset, 4)
self.add_coverage(unk_offset, 4)
# The game does some very bizarre bit-shifting. Its clear tha the first value
# points at a name structure, but its not in the correct endianness. This replicates
@ -780,7 +747,7 @@ class TXP2File:
if name_offset != 0:
# Let's decode this until the first null.
bytedata = self.get_until_null(name_offset)
add_coverage(name_offset, len(bytedata) + 1, unique=False)
self.add_coverage(name_offset, len(bytedata) + 1, unique=False)
name = descramble_text(bytedata, self.text_obfuscated)
vprint(f" {name}")
@ -790,7 +757,7 @@ class TXP2File:
data=self.data[(unk_offset + 4):(unk_offset + 16)],
)
)
add_coverage(unk_offset + 4, 12)
self.add_coverage(unk_offset + 4, 12)
else:
vprint("Bit 0x000040 - unknown; NOT PRESENT")
@ -798,7 +765,7 @@ class TXP2File:
# One unknown byte, treated as an offset. This is clearly the mapping for the parsed
# structures from 0x40, but I don't know what those are.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x000080 - unknownmapping; offset: {hex(offset)}")
@ -813,7 +780,7 @@ class TXP2File:
# Two unknown bytes, first is a length or a count. Secound is
# an optional offset to grab another set of bytes from.
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x000100 - unknown; count: {length}, offset: {hex(offset)}")
@ -824,7 +791,7 @@ class TXP2File:
self.unknown2.append(
Unknown2(self.data[unk_offset:(unk_offset + 4)])
)
add_coverage(unk_offset, 4)
self.add_coverage(unk_offset, 4)
else:
vprint("Bit 0x000100 - unknown; NOT PRESENT")
@ -832,7 +799,7 @@ class TXP2File:
# One unknown byte, treated as an offset. Almost positive its a string mapping
# for the above 0x100 structure. That's how this file format appears to work.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x000200 - unknownmapping; offset: {hex(offset)}")
@ -848,7 +815,7 @@ class TXP2File:
# it seems to be empty data in files that I've looked at, it doesn't go to any
# structure or mapping.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x000400 - unknown; offset: {hex(offset)}")
@ -859,7 +826,7 @@ class TXP2File:
# SWF raw data that is loaded and passed to AFP core. It is equivalent to the
# afp files in an IFS container.
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x000800 - swfdata; count: {length}, offset: {hex(offset)}")
@ -871,11 +838,11 @@ class TXP2File:
f"{self.endian}III",
self.data[interesting_offset:(interesting_offset + 12)],
)
add_coverage(interesting_offset, 12)
self.add_coverage(interesting_offset, 12)
if name_offset != 0:
# Let's decode this until the first null.
bytedata = self.get_until_null(name_offset)
add_coverage(name_offset, len(bytedata) + 1, unique=False)
self.add_coverage(name_offset, len(bytedata) + 1, unique=False)
name = descramble_text(bytedata, self.text_obfuscated)
vprint(f" {name}, length: {swf_length}, offset: {hex(swf_offset)}")
@ -886,14 +853,14 @@ class TXP2File:
self.data[swf_offset:(swf_offset + swf_length)]
)
)
add_coverage(swf_offset, swf_length)
self.add_coverage(swf_offset, swf_length)
else:
vprint("Bit 0x000800 - swfdata; NOT PRESENT")
if feature_mask & 0x1000:
# A mapping structure that allows looking up SWF data by name.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x001000 - swfmapping; offset: {hex(offset)}")
@ -908,7 +875,7 @@ class TXP2File:
# loaded texture shape and the region that contains data. They are equivalent to the
# geo files found in an IFS container.
length, offset = struct.unpack(f"{self.endian}II", self.data[header_offset:(header_offset + 8)])
add_coverage(header_offset, 8)
self.add_coverage(header_offset, 8)
header_offset += 8
vprint(f"Bit 0x002000 - shapes; count: {length}, offset: {hex(offset)}")
@ -920,12 +887,12 @@ class TXP2File:
f"{self.endian}III",
self.data[shape_base_offset:(shape_base_offset + 12)],
)
add_coverage(shape_base_offset, 12)
self.add_coverage(shape_base_offset, 12)
if name_offset != 0:
# Let's decode this until the first null.
bytedata = self.get_until_null(name_offset)
add_coverage(name_offset, len(bytedata) + 1, unique=False)
self.add_coverage(name_offset, len(bytedata) + 1, unique=False)
name = descramble_text(bytedata, self.text_obfuscated)
else:
name = "<unnamed>"
@ -937,7 +904,7 @@ class TXP2File:
)
shape.parse(text_obfuscated=self.text_obfuscated)
self.shapes.append(shape)
add_coverage(shape_offset, shape_length)
self.add_coverage(shape_offset, shape_length)
vprint(f" {name}, length: {shape_length}, offset: {hex(shape_offset)}")
for line in str(shape).split(os.linesep):
@ -949,7 +916,7 @@ class TXP2File:
if feature_mask & 0x4000:
# Mapping so that shapes can be looked up by name to get their offset.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x004000 - shapesmapping; offset: {hex(offset)}")
@ -963,7 +930,7 @@ class TXP2File:
# One unknown byte, treated as an offset. I have no idea what this is because
# the games I've looked at don't include this bit.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x008000 - unknown; offset: {hex(offset)}")
@ -978,13 +945,13 @@ class TXP2File:
# Included font package, BINXRPC encoded. This is basically a texture sheet with an XML
# pointing at the region in the texture sheet for every renderable character.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
# I am not sure what the unknown byte is for. It always appears as
# all zeros in all files I've looked at.
expect_zero, length, binxrpc_offset = struct.unpack(f"{self.endian}III", self.data[offset:(offset + 12)])
add_coverage(offset, 12)
self.add_coverage(offset, 12)
vprint(f"Bit 0x010000 - fontinfo; offset: {hex(offset)}, binxrpc offset: {hex(binxrpc_offset)}")
@ -995,7 +962,7 @@ class TXP2File:
if binxrpc_offset != 0:
self.fontdata = self.benc.decode(self.data[binxrpc_offset:(binxrpc_offset + length)])
add_coverage(binxrpc_offset, length)
self.add_coverage(binxrpc_offset, length)
else:
self.fontdata = None
else:
@ -1005,7 +972,7 @@ class TXP2File:
# This is the byteswapping headers that allow us to byteswap the SWF data before passing it
# to AFP core. It is equivalent to the bsi files in an IFS container.
offset = struct.unpack(f"{self.endian}I", self.data[header_offset:(header_offset + 4)])[0]
add_coverage(header_offset, 4)
self.add_coverage(header_offset, 4)
header_offset += 4
vprint(f"Bit 0x020000 - swfheaders; offset: {hex(offset)}")
@ -1022,7 +989,7 @@ class TXP2File:
self.data[structure_offset:(structure_offset + 12)]
)
vprint(f" length: {afp_header_length}, offset: {hex(afp_header)}")
add_coverage(structure_offset, 12)
self.add_coverage(structure_offset, 12)
if expect_zero != 0:
# If we find non-zero versions of this, then that means updating the file is
@ -1030,7 +997,7 @@ class TXP2File:
raise Exception("Expected a zero in SWF header!")
self.swfdata[i].descramble_info = self.data[afp_header:(afp_header + afp_header_length)]
add_coverage(afp_header, afp_header_length)
self.add_coverage(afp_header, afp_header_length)
else:
vprint("Bit 0x020000 - swfheaders; NOT PRESENT")

View File

@ -6,51 +6,32 @@ from typing import Any, Dict, List, Tuple
from .types import Matrix, Color, Point, Rectangle
from .types import AP2Action, AP2Tag, AP2Property
from .util import _hex
from .util import TrackedCoverage, _hex
class SWF:
class SWF(TrackedCoverage):
def __init__(
self,
name: str,
data: bytes,
descramble_info: bytes = b"",
) -> None:
# First, init the coverage engine.
super().__init__()
# Now, initialize parsed data.
self.name = name
self.exported_name = ""
self.data = data
self.descramble_info = descramble_info
# Initialize coverage. This is used to help find missed/hidden file
# sections that we aren't parsing correctly.
self.coverage: List[bool] = [False] * len(data)
# Initialize string table. This is used for faster lookup of strings
# as well as tracking which strings in the table have been parsed correctly.
self.strings: Dict[int, Tuple[str, bool]] = {}
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
for i in range(offset, offset + length):
if self.coverage[i] and unique:
raise Exception(f"Already covered {hex(offset)}!")
self.coverage[i] = True
def print_coverage(self) -> None:
# First offset that is not coverd in a run.
start = None
for offset, covered in enumerate(self.coverage):
if covered:
if start is not None:
print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
start = None
else:
if start is None:
start = offset
if start is not None:
# Print final range
offset = len(self.coverage)
print(f"Uncovered bytes: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
# First print uncovered bytes
super().print_coverage()
# Now, print uncovered strings
for offset, (string, covered) in self.strings.items():
@ -71,15 +52,10 @@ class SWF:
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
# First, we need to check if this is a SWF-style bytecode or an AP2 bytecode.
ap2_sentinel = struct.unpack("<B", datachunk[0:1])[0]
@ -375,27 +351,22 @@ class SWF:
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
if tagid == AP2Tag.AP2_SHAPE:
if size != 4:
raise Exception(f"Invalid shape size {size}")
_, shape_id = struct.unpack("<HH", ap2data[dataoffset:(dataoffset + 4)])
add_coverage(dataoffset, size)
self.add_coverage(dataoffset, size)
shape_reference = f"{self.exported_name}_shape{shape_id}"
vprint(f"{prefix} Tag ID: {shape_id}, AFP Reference: {shape_reference}, IFS GEO Filename: {md5(shape_reference.encode('utf-8')).hexdigest()}")
elif tagid == AP2Tag.AP2_DEFINE_SPRITE:
sprite_flags, sprite_id = struct.unpack("<HH", ap2data[dataoffset:(dataoffset + 4)])
add_coverage(dataoffset, 4)
self.add_coverage(dataoffset, 4)
if sprite_flags & 1 == 0:
# This is an old-style tag, it has data directly following the header.
@ -403,13 +374,13 @@ class SWF:
else:
# This is a new-style tag, it has a relative data pointer.
subtags_offset = struct.unpack("<I", ap2data[(dataoffset + 4):(dataoffset + 8)])[0] + dataoffset
add_coverage(dataoffset + 4, 4)
self.add_coverage(dataoffset + 4, 4)
vprint(f"{prefix} Tag ID: {sprite_id}")
self.__parse_tags(ap2_version, afp_version, ap2data, subtags_offset, prefix=" " + prefix, verbose=verbose)
elif tagid == AP2Tag.AP2_DEFINE_FONT:
unk, font_id, fontname_offset, xml_prefix_offset, data_offset, data_count = struct.unpack("<HHHHHH", ap2data[dataoffset:(dataoffset + 12)])
add_coverage(dataoffset, 12)
self.add_coverage(dataoffset, 12)
fontname = self.__get_string(fontname_offset)
xml_prefix = self.__get_string(xml_prefix_offset)
@ -419,18 +390,18 @@ class SWF:
for i in range(data_count):
entry_offset = dataoffset + 12 + (data_offset * 2) + (i * 2)
entry_value = struct.unpack("<H", ap2data[entry_offset:(entry_offset + 2)])[0]
add_coverage(entry_offset, 2)
self.add_coverage(entry_offset, 2)
vprint(f"{prefix} Height: {entry_value}")
elif tagid == AP2Tag.AP2_DO_ACTION:
datachunk = ap2data[dataoffset:(dataoffset + size)]
self.__parse_bytecode(datachunk, prefix=prefix, verbose=verbose)
add_coverage(dataoffset, size)
self.add_coverage(dataoffset, size)
elif tagid == AP2Tag.AP2_PLACE_OBJECT:
# Allow us to keep track of what we've consumed.
datachunk = ap2data[dataoffset:(dataoffset + size)]
flags, depth, object_id = struct.unpack("<IHH", datachunk[0:8])
add_coverage(dataoffset, 8)
self.add_coverage(dataoffset, 8)
vprint(f"{prefix} Flags: {hex(flags)}, Object ID: {object_id}, Depth: {depth}")
@ -440,21 +411,21 @@ class SWF:
if flags & 0x2:
unhandled_flags &= ~0x2
src_tag_id = struct.unpack("<H", datachunk[running_pointer:(running_pointer + 2)])[0]
add_coverage(dataoffset + running_pointer, 2)
self.add_coverage(dataoffset + running_pointer, 2)
running_pointer += 2
vprint(f"{prefix} Source Tag ID: {src_tag_id}")
if flags & 0x10:
unhandled_flags &= ~0x10
unk2 = struct.unpack("<H", datachunk[running_pointer:(running_pointer + 2)])[0]
add_coverage(dataoffset + running_pointer, 2)
self.add_coverage(dataoffset + running_pointer, 2)
running_pointer += 2
vprint(f"{prefix} Unk2: {hex(unk2)}")
if flags & 0x20:
unhandled_flags &= ~0x20
nameoffset = struct.unpack("<H", datachunk[running_pointer:(running_pointer + 2)])[0]
add_coverage(dataoffset + running_pointer, 2)
self.add_coverage(dataoffset + running_pointer, 2)
name = self.__get_string(nameoffset)
running_pointer += 2
vprint(f"{prefix} Name: {name}")
@ -462,14 +433,14 @@ class SWF:
if flags & 0x40:
unhandled_flags &= ~0x40
unk3 = struct.unpack("<H", datachunk[running_pointer:(running_pointer + 2)])[0]
add_coverage(dataoffset + running_pointer, 2)
self.add_coverage(dataoffset + running_pointer, 2)
running_pointer += 2
vprint(f"{prefix} Unk3: {hex(unk3)}")
if flags & 0x20000:
unhandled_flags &= ~0x20000
blend = struct.unpack("<B", datachunk[running_pointer:(running_pointer + 1)])[0]
add_coverage(dataoffset + running_pointer, 1)
self.add_coverage(dataoffset + running_pointer, 1)
running_pointer += 1
vprint(f"{prefix} Blend: {hex(blend)}")
@ -477,7 +448,7 @@ class SWF:
misalignment = running_pointer & 3
if misalignment > 0:
catchup = 4 - misalignment
add_coverage(dataoffset + running_pointer, catchup)
self.add_coverage(dataoffset + running_pointer, catchup)
running_pointer += catchup
# Handle transformation matrix.
@ -486,7 +457,7 @@ class SWF:
if flags & 0x100:
unhandled_flags &= ~0x100
a_int, d_int = struct.unpack("<II", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
transform.a = float(a_int) * 0.0009765625
@ -496,7 +467,7 @@ class SWF:
if flags & 0x200:
unhandled_flags &= ~0x200
b_int, c_int = struct.unpack("<II", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
transform.b = float(b_int) * 0.0009765625
@ -506,7 +477,7 @@ class SWF:
if flags & 0x400:
unhandled_flags &= ~0x400
tx_int, ty_int = struct.unpack("<II", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
transform.tx = float(tx_int) / 20.0
@ -520,7 +491,7 @@ class SWF:
if flags & 0x800:
unhandled_flags &= ~0x800
r, g, b, a = struct.unpack("<HHHH", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
color.r = float(r) * 0.003921569
@ -532,7 +503,7 @@ class SWF:
if flags & 0x1000:
unhandled_flags &= ~0x1000
r, g, b, a = struct.unpack("<HHHH", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
acolor.r = float(r) * 0.003921569
@ -544,7 +515,7 @@ class SWF:
if flags & 0x2000:
unhandled_flags &= ~0x2000
rgba = struct.unpack("<I", datachunk[running_pointer:(running_pointer + 4)])[0]
add_coverage(dataoffset + running_pointer, 4)
self.add_coverage(dataoffset + running_pointer, 4)
running_pointer += 4
color.r = float((rgba >> 24) & 0xFF) * 0.003921569
@ -556,7 +527,7 @@ class SWF:
if flags & 0x4000:
unhandled_flags &= ~0x4000
rgba = struct.unpack("<I", datachunk[running_pointer:(running_pointer + 4)])[0]
add_coverage(dataoffset + running_pointer, 4)
self.add_coverage(dataoffset + running_pointer, 4)
running_pointer += 4
acolor.r = float((rgba >> 24) & 0xFF) * 0.003921569
@ -569,11 +540,11 @@ class SWF:
# Object event triggers.
unhandled_flags &= ~0x80
event_flags, event_size = struct.unpack("<II", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
if event_flags != 0:
_, count = struct.unpack("<HH", datachunk[(running_pointer + 8):(running_pointer + 12)])
add_coverage(dataoffset + running_pointer + 8, 4)
self.add_coverage(dataoffset + running_pointer + 8, 4)
# The game does not seem to care about length here, but we do, so let's calculate
# offsets and use that for lengths.
@ -592,7 +563,7 @@ class SWF:
for evt in range(count):
evt_offset = running_pointer + 12 + (evt * 8)
evt_flags, _, keycode, bytecode_offset = struct.unpack("<IBBH", datachunk[evt_offset:(evt_offset + 8)])
add_coverage(dataoffset + evt_offset, 8)
self.add_coverage(dataoffset + evt_offset, 8)
events: List[str] = []
if evt_flags & 0x1:
@ -629,7 +600,7 @@ class SWF:
vprint(f"{prefix} Flags: {hex(evt_flags)} ({', '.join(events)}), KeyCode: {hex(keycode)}, Bytecode Offset: {hex(dataoffset + bytecode_offset)}, Length: {bytecode_length}")
self.__parse_bytecode(datachunk[bytecode_offset:(bytecode_offset + bytecode_length)], prefix=prefix + " ", verbose=verbose)
add_coverage(dataoffset + bytecode_offset, bytecode_length)
self.add_coverage(dataoffset + bytecode_offset, bytecode_length)
running_pointer += event_size
@ -638,7 +609,7 @@ class SWF:
# if I encounter files with it.
unhandled_flags &= ~0x10000
count, filter_size = struct.unpack("<HH", datachunk[running_pointer:(running_pointer + 4)])
add_coverage(dataoffset + running_pointer, 4)
self.add_coverage(dataoffset + running_pointer, 4)
running_pointer += filter_size
# TODO: This is not understood at all. I need to find data that uses it to continue.
@ -652,7 +623,7 @@ class SWF:
# Some sort of point, perhaps an x, y offset for the object?
unhandled_flags &= ~0x1000000
x, y = struct.unpack("<ff", datachunk[running_pointer:(running_pointer + 8)])
add_coverage(dataoffset + running_pointer, 8)
self.add_coverage(dataoffset + running_pointer, 8)
running_pointer += 8
# TODO: This doesn't seem right when run past Pop'n Music data.
@ -669,7 +640,7 @@ class SWF:
# Some pair of shorts, not sure, its in DDR PS3 data.
unhandled_flags &= ~0x40000
x, y = struct.unpack("<HH", datachunk[running_pointer:(running_pointer + 4)])
add_coverage(dataoffset + running_pointer, 4)
self.add_coverage(dataoffset + running_pointer, 4)
running_pointer += 4
# TODO: I have no idea what these are.
@ -680,7 +651,7 @@ class SWF:
# Some pair of shorts, not sure, its in DDR PS3 data.
unhandled_flags &= ~0x80000
x, y = struct.unpack("<HH", datachunk[running_pointer:(running_pointer + 4)])
add_coverage(dataoffset + running_pointer, 4)
self.add_coverage(dataoffset + running_pointer, 4)
running_pointer += 4
# TODO: I have no idea what these are.
@ -715,19 +686,19 @@ class SWF:
object_id, depth = struct.unpack("<HH", ap2data[dataoffset:(dataoffset + 4)])
vprint(f"{prefix} Object ID: {object_id}, Depth: {depth}")
add_coverage(dataoffset, 4)
self.add_coverage(dataoffset, 4)
elif tagid == AP2Tag.AP2_DEFINE_EDIT_TEXT:
if size != 44:
raise Exception("Invalid size {size} to get data from AP2_DEFINE_EDIT_TEXT!")
flags, edit_text_id, defined_font_tag_id, font_height, unk_str2_offset = struct.unpack("<IHHHH", ap2data[dataoffset:(dataoffset + 12)])
add_coverage(dataoffset, 12)
self.add_coverage(dataoffset, 12)
unk1, unk2, unk3, unk4 = struct.unpack("<HHHH", ap2data[(dataoffset + 12):(dataoffset + 20)])
add_coverage(dataoffset + 12, 8)
self.add_coverage(dataoffset + 12, 8)
rgba, f1, f2, f3, f4, variable_name_offset, default_text_offset = struct.unpack("<IiiiiHH", ap2data[(dataoffset + 20):(dataoffset + 44)])
add_coverage(dataoffset + 20, 24)
self.add_coverage(dataoffset + 20, 24)
vprint(f"{prefix} Tag ID: {edit_text_id}, Font Tag: {defined_font_tag_id}, Height Selection: {font_height}, Flags: {hex(flags)}")
@ -765,20 +736,15 @@ class SWF:
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
unknown_tags_flags, unknown_tags_count, frame_count, tags_count, unknown_tags_offset, frame_offset, tags_offset = struct.unpack(
"<HHIIIII",
ap2data[tags_base_offset:(tags_base_offset + 24)]
)
add_coverage(tags_base_offset, 24)
self.add_coverage(tags_base_offset, 24)
# Fix up pointers.
tags_offset += tags_base_offset
@ -789,7 +755,7 @@ class SWF:
vprint(f"{prefix}Number of Tags: {tags_count}")
for i in range(tags_count):
tag = struct.unpack("<I", ap2data[tags_offset:(tags_offset + 4)])[0]
add_coverage(tags_offset, 4)
self.add_coverage(tags_offset, 4)
tagid = (tag >> 22) & 0x3FF
size = tag & 0x3FFFFF
@ -805,7 +771,7 @@ class SWF:
vprint(f"{prefix}Number of Frames: {frame_count}")
for i in range(frame_count):
frame_info = struct.unpack("<I", ap2data[frame_offset:(frame_offset + 4)])[0]
add_coverage(frame_offset, 4)
self.add_coverage(frame_offset, 4)
start_tag_id = frame_info & 0xFFFFF
num_tags_to_play = (frame_info >> 20) & 0xFFF
@ -819,7 +785,7 @@ class SWF:
for i in range(unknown_tags_count):
unk1, stringoffset = struct.unpack("<HH", ap2data[unknown_tags_offset:(unknown_tags_offset + 4)])
strval = self.__get_string(stringoffset)
add_coverage(unknown_tags_offset, 4)
self.add_coverage(unknown_tags_offset, 4)
vprint(f"{prefix} Unknown Tag: {hex(unk1)} Name: {strval}")
unknown_tags_offset += 4
@ -895,23 +861,21 @@ class SWF:
return self.strings[offset][0]
def parse(self, verbose: bool = False) -> None:
with self.covered(len(self.data), verbose):
self.__parse(verbose)
def __parse(self, verbose: bool) -> None:
# Suppress debug text unless asked
if verbose:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
print(*args, **kwargs, file=sys.stderr)
add_coverage = self.add_coverage
# Reinitialize coverage.
self.coverage = [False] * len(self.data)
self.strings = {}
else:
def vprint(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
def add_coverage(*args: Any, **kwargs: Any) -> None: # type: ignore
pass
# First, use the byteswap header to descramble the data.
data = self.__descramble(self.data, self.descramble_info)
@ -919,7 +883,7 @@ class SWF:
magic, length, version, nameoffset, flags, left, right, top, bottom = struct.unpack("<4sIHHIHHHH", data[0:24])
width = right - left
height = bottom - top
add_coverage(0, 24)
self.add_coverage(0, 24)
ap2_data_version = magic[0] & 0xFF
magic = bytes([magic[3] & 0x7F, magic[2] & 0x7F, magic[1] & 0x7F, 0x0])
@ -943,34 +907,34 @@ class SWF:
)
else:
swf_color = None
add_coverage(28, 4)
self.add_coverage(28, 4)
if flags & 0x2:
# FPS can be either an integer or a float.
fps = struct.unpack("<i", data[24:28])[0] * 0.0009765625
else:
fps = struct.unpack("<f", data[24:28])[0]
add_coverage(24, 4)
self.add_coverage(24, 4)
if flags & 0x4:
# This seems related to imported tags.
imported_tag_initializers_offset = struct.unpack("<I", data[56:60])[0]
add_coverage(56, 4)
self.add_coverage(56, 4)
else:
# Unknown offset is not present.
imported_tag_initializers_offset = None
# String table
stringtable_offset, stringtable_size = struct.unpack("<II", data[48:56])
add_coverage(48, 8)
self.add_coverage(48, 8)
# Descramble string table.
data = self.__descramble_stringtable(data, stringtable_offset, stringtable_size)
add_coverage(stringtable_offset, stringtable_size)
self.add_coverage(stringtable_offset, stringtable_size)
# Get exported SWF name.
self.exported_name = self.__get_string(nameoffset)
add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False)
self.add_coverage(nameoffset + stringtable_offset, len(self.exported_name) + 1, unique=False)
vprint(f"{os.linesep}AFP name: {self.name}")
vprint(f"Container Version: {hex(ap2_data_version)}")
vprint(f"Version: {hex(version)}")
@ -994,49 +958,49 @@ class SWF:
# Exported assets
num_exported_assets = struct.unpack("<H", data[32:34])[0]
asset_offset = struct.unpack("<I", data[40:44])[0]
add_coverage(32, 2)
add_coverage(40, 4)
self.add_coverage(32, 2)
self.add_coverage(40, 4)
# Parse exported asset tag names and their tag IDs.
vprint(f"Number of Exported Tags: {num_exported_assets}")
for assetno in range(num_exported_assets):
asset_data_offset, asset_string_offset = struct.unpack("<HH", data[asset_offset:(asset_offset + 4)])
add_coverage(asset_offset, 4)
self.add_coverage(asset_offset, 4)
asset_offset += 4
asset_name = self.__get_string(asset_string_offset)
add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False)
self.add_coverage(asset_string_offset + stringtable_offset, len(asset_name) + 1, unique=False)
vprint(f" {assetno}: Tag Name: {asset_name} Tag ID: {asset_data_offset}")
# Tag sections
tags_offset = struct.unpack("<I", data[36:40])[0]
add_coverage(36, 4)
self.add_coverage(36, 4)
self.__parse_tags(ap2_data_version, version, data, tags_offset, verbose=verbose)
# Imported tags sections
imported_tags_count = struct.unpack("<h", data[34:36])[0]
imported_tags_offset = struct.unpack("<I", data[44:48])[0]
imported_tags_data_offset = imported_tags_offset + 4 * imported_tags_count
add_coverage(34, 2)
add_coverage(44, 4)
self.add_coverage(34, 2)
self.add_coverage(44, 4)
vprint(f"Number of Imported Tags: {imported_tags_count}")
for i in range(imported_tags_count):
# First grab the SWF this is importing from, and the number of assets being imported.
swf_name_offset, count = struct.unpack("<HH", data[imported_tags_offset:(imported_tags_offset + 4)])
add_coverage(imported_tags_offset, 4)
self.add_coverage(imported_tags_offset, 4)
swf_name = self.__get_string(swf_name_offset)
add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False)
self.add_coverage(swf_name_offset + stringtable_offset, len(swf_name) + 1, unique=False)
vprint(f" Source SWF: {swf_name}")
# Now, grab the actual asset names being imported.
for j in range(count):
asset_id_no, asset_name_offset = struct.unpack("<HH", data[imported_tags_data_offset:(imported_tags_data_offset + 4)])
add_coverage(imported_tags_data_offset, 4)
self.add_coverage(imported_tags_data_offset, 4)
asset_name = self.__get_string(asset_name_offset)
add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False)
self.add_coverage(asset_name_offset + stringtable_offset, len(asset_name) + 1, unique=False)
vprint(f" Tag ID: {asset_id_no}, Requested Asset: {asset_name}")
imported_tags_data_offset += 4
@ -1048,14 +1012,14 @@ class SWF:
if imported_tag_initializers_offset is not None:
unk1, length = struct.unpack("<HH", data[imported_tag_initializers_offset:(imported_tag_initializers_offset + 4)])
add_coverage(imported_tag_initializers_offset, 4)
self.add_coverage(imported_tag_initializers_offset, 4)
vprint(f"Imported Tag Initializer Offset: {hex(imported_tag_initializers_offset)}, Length: {length}")
for i in range(length):
item_offset = imported_tag_initializers_offset + 4 + (i * 12)
tag_id, frame, action_bytecode_offset, action_bytecode_length = struct.unpack("<HHII", data[item_offset:(item_offset + 12)])
add_coverage(item_offset, 12)
self.add_coverage(item_offset, 12)
if action_bytecode_length != 0:
vprint(f" Tag ID: {tag_id}, Frame: {frame}, Bytecode Offset: {hex(action_bytecode_offset + imported_tag_initializers_offset)}")

View File

@ -1,3 +1,8 @@
import sys
from typing import Any, List
def _hex(data: int) -> str:
hexval = hex(data)[2:]
if len(hexval) == 1:
@ -34,3 +39,54 @@ def scramble_text(text: str, obfuscated: bool) -> bytes:
return bytes(((x + 0x80) & 0xFF) for x in text.encode('ascii')) + b'\0'
else:
return text.encode('ascii') + b'\0'
class TrackedCoverageManager:
def __init__(self, covered_class: "TrackedCoverage", verbose: bool) -> None:
self.covered_class = covered_class
self.verbose = verbose
def __enter__(self) -> "TrackedCoverageManager":
if self.verbose:
self.covered_class._tracking = True
return self
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self.covered_class._tracking = False
class TrackedCoverage:
def __init__(self) -> None:
self.coverage: List[bool] = []
self._tracking: bool = False
def covered(self, size: int, verbose: bool) -> TrackedCoverageManager:
if verbose:
self.coverage = [False] * size
return TrackedCoverageManager(self, verbose)
def add_coverage(self, offset: int, length: int, unique: bool = True) -> None:
if not self._tracking:
# Save some CPU cycles if we aren't verbose.
return
for i in range(offset, offset + length):
if self.coverage[i] and unique:
raise Exception(f"Already covered {hex(offset)}!")
self.coverage[i] = True
def print_coverage(self) -> None:
# First offset that is not coverd in a run.
start = None
for offset, covered in enumerate(self.coverage):
if covered:
if start is not None:
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)
start = None
else:
if start is None:
start = offset
if start is not None:
# Print final range
offset = len(self.coverage)
print(f"Uncovered: {hex(start)} - {hex(offset)} ({offset-start} bytes)", file=sys.stderr)