573in1/tools/common/cartdata.py

579 lines
15 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import logging
from dataclasses import dataclass
from enum import IntEnum, IntFlag
from struct import Struct, unpack
from typing import Any, Sequence, Type
from .cart import CartDump, ChipType, ROMHeaderDump
from .games import GAME_CODE_REGEX, GAME_REGION_REGEX
from .util import checksum8, checksum16, shortenedMD5
## Definitions
class FormatType(IntEnum):
BLANK = 0
SIMPLE = 1
BASIC = 2
EXTENDED = 3
class TraceIDType(IntEnum):
TID_NONE = 0
TID_81 = 1
TID_82_BIG_ENDIAN = 2
TID_82_LITTLE_ENDIAN = 3
class DataFlag(IntFlag):
DATA_HAS_CODE_PREFIX = 1 << 0
DATA_HAS_TRACE_ID = 1 << 1
DATA_HAS_CART_ID = 1 << 2
DATA_HAS_INSTALL_ID = 1 << 3
DATA_HAS_SYSTEM_ID = 1 << 4
DATA_HAS_PUBLIC_SECTION = 1 << 5
DATA_CHECKSUM_INVERTED = 1 << 6
DATA_GX706_WORKAROUND = 1 << 7
class ParserError(BaseException):
pass
## Common data structures
@dataclass
class IdentifierSet:
traceID: bytes | None = None # aka TID
cartID: bytes | None = None # aka SID
installID: bytes | None = None # aka MID
systemID: bytes | None = None # aka XID
def __init__(self, data: bytes):
ids: list[bytes | None] = []
for offset in range(0, 32, 8):
_id: bytes = data[offset:offset + 8]
ids.append(_id if sum(_id) else None)
(
self.traceID,
self.cartID,
self.installID,
self.systemID
) = ids
def getFlags(self) -> DataFlag:
flags: DataFlag = DataFlag(0)
if self.traceID:
flags |= DataFlag.DATA_HAS_TRACE_ID
if self.cartID:
flags |= DataFlag.DATA_HAS_CART_ID
if self.installID:
flags |= DataFlag.DATA_HAS_INSTALL_ID
if self.systemID:
flags |= DataFlag.DATA_HAS_SYSTEM_ID
return flags
def getCartIDChecksum(self, param: int) -> int:
if self.cartID is None:
return 0
checksum: int = 0
for i in range(6):
value: int = self.cartID[i + 1]
for j in range(i * 8, (i + 1) * 8):
if value & 1:
checksum ^= 1 << (j % param)
value >>= 1
return checksum & 0xffff
def getTraceIDType(self, param: int) -> TraceIDType:
if self.traceID is None:
return TraceIDType.TID_NONE
match self.traceID[0]:
case 0x81:
return TraceIDType.TID_81
case 0x82:
checksum: int = self.getCartIDChecksum(param)
big: int = unpack("> H", self.traceID[1:3])[0]
little: int = unpack("< H", self.traceID[1:3])[0]
if checksum == big:
return TraceIDType.TID_82_BIG_ENDIAN
elif checksum == little:
return TraceIDType.TID_82_LITTLE_ENDIAN
raise ValueError(
f"trace ID mismatch, exp=0x{checksum:04x}, "
f"big=0x{big:04x}, little=0x{little:04x}"
)
case prefix:
raise ValueError(f"unknown trace ID prefix: 0x{prefix:02x}")
@dataclass
class PublicIdentifierSet:
installID: bytes | None = None # aka MID
systemID: bytes | None = None # aka XID
def __init__(self, data: bytes):
ids: list[bytes | None] = []
for offset in range(0, 16, 8):
_id: bytes = data[offset:offset + 8]
ids.append(_id if sum(_id) else None)
self.installID, self.systemID = ids
def getFlags(self) -> DataFlag:
flags: DataFlag = DataFlag(0)
if self.installID:
flags |= DataFlag.DATA_HAS_INSTALL_ID
if self.systemID:
flags |= DataFlag.DATA_HAS_SYSTEM_ID
return flags
## Cartridge data parsers
_BASIC_HEADER_STRUCT: Struct = Struct("< 2s 2s B 3x")
_EXTENDED_HEADER_STRUCT: Struct = Struct("< 8s H 4s H")
# The system and install IDs are excluded from validation as they may not be
# always present.
_IDENTIFIER_FLAG_MASK: DataFlag = \
DataFlag.DATA_HAS_TRACE_ID | DataFlag.DATA_HAS_CART_ID
def _getPublicData(
dump: CartDump, flags: DataFlag, maxLength: int = 512
) -> bytes:
if flags & DataFlag.DATA_HAS_PUBLIC_SECTION:
_, offset, length = dump.getChipSize()
return dump.data[offset:offset + min(length, maxLength)]
else:
return dump.data[0:maxLength]
@dataclass
class CartParser:
flags: DataFlag
identifiers: IdentifierSet
publicIdentifiers: PublicIdentifierSet
region: str | None = None
codePrefix: str | None = None
code: str | None = None
year: int | None = None
def getFormatType(self) -> FormatType:
return FormatType.BLANK
class SimpleCartParser(CartParser):
def __init__(self, dump: CartDump, flags: DataFlag):
region: bytes = _getPublicData(dump, flags, 8).rstrip(b"\0")
if GAME_REGION_REGEX.fullmatch(region) is None:
raise ParserError(f"invalid game region: {region}")
super().__init__(
flags,
IdentifierSet(b""),
PublicIdentifierSet(b""),
region.decode("ascii")
)
def getFormatType(self) -> FormatType:
return FormatType.SIMPLE
class BasicCartParser(CartParser):
def __init__(self, dump: CartDump, flags: DataFlag):
data: bytes = _getPublicData(dump, flags, _BASIC_HEADER_STRUCT.size)
pri: IdentifierSet = IdentifierSet(dump.data[_BASIC_HEADER_STRUCT.size:])
region, codePrefix, checksum = _BASIC_HEADER_STRUCT.unpack(data)
codePrefix: bytes = codePrefix.rstrip(b"\0")
value: int = checksum8(
data[0:4], bool(flags & DataFlag.DATA_CHECKSUM_INVERTED)
)
if value != checksum:
raise ParserError(
f"invalid header checksum, exp=0x{value:02x}, "
f"got=0x{checksum:02x}"
)
if GAME_REGION_REGEX.fullmatch(region) is None:
raise ParserError(f"invalid game region: {region}")
if bool(flags & DataFlag.DATA_HAS_CODE_PREFIX) != bool(codePrefix):
raise ParserError(
f"game code prefix should{' not' if codePrefix else ''} be "
f"present"
)
if (pri.getFlags() ^ flags) & _IDENTIFIER_FLAG_MASK:
raise ParserError("identifier flags do not match")
super().__init__(
flags,
pri,
PublicIdentifierSet(b""),
region.decode("ascii"),
codePrefix.decode("ascii") or None
)
def getFormatType(self) -> FormatType:
return FormatType.BASIC
class ExtendedCartParser(CartParser):
def __init__(self, dump: CartDump, flags: DataFlag):
data: bytes = \
_getPublicData(dump, flags, _EXTENDED_HEADER_STRUCT.size + 16)
idsPri: bytes = dump.data[_EXTENDED_HEADER_STRUCT.size + 16:]
idsPub: bytes = dump.data[_EXTENDED_HEADER_STRUCT.size:]
header: bytes = data[0:_EXTENDED_HEADER_STRUCT.size]
pri: IdentifierSet = IdentifierSet(idsPri)
pub: PublicIdentifierSet = PublicIdentifierSet(idsPub)
if flags & DataFlag.DATA_GX706_WORKAROUND:
data = data[0:1] + b"X" + data[2:]
code, year, region, checksum = _EXTENDED_HEADER_STRUCT.unpack(header)
code: bytes = code.rstrip(b"\0")
region: bytes = region.rstrip(b"\0")
value: int = checksum16(
data[0:14], bool(flags & DataFlag.DATA_CHECKSUM_INVERTED)
)
if value != checksum:
raise ParserError(
f"invalid header checksum, exp=0x{value:04x}, "
f"got=0x{checksum:04x}"
)
if GAME_CODE_REGEX.fullmatch(code) is None:
raise ParserError(f"invalid game code: {code}")
if GAME_REGION_REGEX.fullmatch(region) is None:
raise ParserError(f"invalid game region: {region}")
if (pri.getFlags() ^ flags) & _IDENTIFIER_FLAG_MASK:
raise ParserError("identifier flags do not match")
_code: str = code.decode("ascii")
super().__init__(
flags,
pri,
pub,
region.decode("ascii"),
_code[0:2],
_code,
year
)
def getFormatType(self) -> FormatType:
return FormatType.EXTENDED
## Flash and RTC header parsers/writers
# Used alongside the system ID and the header itself to calculate the MD5 used
# as a header signature. Seems to be the same in all games.
_SIGNATURE_SALT: bytes = bytes.fromhex("c1 a2 03 d6 ab 70 85 5e")
@dataclass
class ROMHeaderParser:
flags: DataFlag
signature: bytes | None = None
region: str | None = None
codePrefix: str | None = None
code: str | None = None
year: int | None = None
def getFormatType(self) -> FormatType:
return FormatType.BLANK
class ExtendedROMHeaderParser(ROMHeaderParser):
def __init__(self, dump: ROMHeaderDump, flags: DataFlag):
data: bytes = dump.data[0:_EXTENDED_HEADER_STRUCT.size + 8]
header: bytes = data[0:_EXTENDED_HEADER_STRUCT.size]
signature: bytes = data[_EXTENDED_HEADER_STRUCT.size:]
if flags & DataFlag.DATA_GX706_WORKAROUND:
data = data[0:1] + b"X" + data[2:]
code, year, region, checksum = _EXTENDED_HEADER_STRUCT.unpack(header)
code: bytes = code.rstrip(b"\0")
region: bytes = region.rstrip(b"\0")
value: int = checksum16(
data[0:14], bool(flags & DataFlag.DATA_CHECKSUM_INVERTED)
)
if value != checksum:
raise ParserError(
f"invalid header checksum, exp=0x{value:04x}, "
f"got=0x{checksum:04x}"
)
if GAME_CODE_REGEX.fullmatch(code) is None:
raise ParserError(f"invalid game code: {code}")
if GAME_REGION_REGEX.fullmatch(region) is None:
raise ParserError(f"invalid game region: {region}")
if flags & DataFlag.DATA_HAS_SYSTEM_ID:
expected: bytearray = \
shortenedMD5(dump.systemID + header + _SIGNATURE_SALT)
if signature != expected:
raise ParserError(
f"invalid signature, exp={expected.hex()}, "
f"got={signature.hex()}"
)
else:
if sum(signature) not in ( 0, 0xff * 8 ):
raise ParserError("unexpected signature present")
_code: str = code.decode("ascii")
super().__init__(
flags, signature, region.decode("ascii"), _code[0:2], _code, year
)
def getFormatType(self) -> FormatType:
return FormatType.EXTENDED
## Cartridge and flash header database
_CART_DB_ENTRY_STRUCT: Struct = Struct("< 6B H 8s 8s 8s 96s")
_ROM_HEADER_DB_ENTRY_STRUCT: Struct = Struct("< 2B H 8s 8s 96s")
_TRACE_ID_PARAMS: Sequence[int] = 16, 14
@dataclass
class CartDBEntry:
code: str
region: str
name: str
dataKey: bytes
chipType: ChipType
formatType: FormatType
traceIDType: TraceIDType
flags: DataFlag
traceIDParam: int = 0
installIDPrefix: int = 0
year: int = 0
def __init__(
self, code: str, region: str, name: str, dump: CartDump,
parser: CartParser
):
# Find the correct parameters for the trace ID heuristically.
_type: TraceIDType | None = None
for self.traceIDParam in _TRACE_ID_PARAMS:
try:
_type = parser.identifiers.getTraceIDType(self.traceIDParam)
except ValueError:
continue
break
if _type is None:
raise RuntimeError("failed to determine trace ID parameters")
self.code = code
self.region = region
self.name = name
self.dataKey = dump.dataKey
self.chipType = dump.chipType
self.formatType = parser.getFormatType()
self.traceIDType = _type
self.flags = parser.flags
self.year = parser.year or 0
if parser.publicIdentifiers.installID is not None:
self.installIDPrefix = parser.publicIdentifiers.installID[0]
elif parser.identifiers.installID is not None:
self.installIDPrefix = parser.identifiers.installID[0]
else:
self.installIDPrefix = 0
# Implement the comparison overload so sorting will work. The 3-digit number
# in the game code is used as a key.
def __lt__(self, entry: Any) -> bool:
return ( self.code[2:], self.code[0:2], self.region, self.name ) < \
( entry.code[2:], entry.code[0:2], entry.region, entry.name )
def requiresCartID(self) -> bool:
if self.flags & DataFlag.DATA_HAS_CART_ID:
return True
if (self.flags & DataFlag.DATA_HAS_TRACE_ID) and \
(self.traceIDType >= TraceIDType.TID_82_BIG_ENDIAN):
return True
return False
def serialize(self) -> bytes:
return _CART_DB_ENTRY_STRUCT.pack(
self.chipType,
self.formatType,
self.traceIDType,
self.flags,
self.traceIDParam,
self.installIDPrefix,
self.year,
self.dataKey,
self.code.encode("ascii"),
self.region.encode("ascii"),
self.name.encode("ascii")
)
@dataclass
class ROMHeaderDBEntry:
code: str
region: str
name: str
formatType: FormatType
flags: DataFlag
year: int = 0
def __init__(
self, code: str, region: str, name: str, parser: ROMHeaderParser
):
self.code = code
self.region = region
self.name = name
self.formatType = parser.getFormatType()
self.flags = parser.flags
self.year = parser.year or 0
def __lt__(self, entry: Any) -> bool:
return ( self.code[2:], self.code[0:2], self.region, self.name ) < \
( entry.code[2:], entry.code[0:2], entry.region, entry.name )
def serialize(self) -> bytes:
return _ROM_HEADER_DB_ENTRY_STRUCT.pack(
self.formatType,
self.flags,
self.year,
self.code.encode("ascii"),
self.region.encode("ascii"),
self.name.encode("ascii")
)
## Data format identification
_KNOWN_CART_FORMATS: Sequence[tuple[str, Type, DataFlag]] = (
(
# Used by GCB48 (and possibly other games?)
"region only",
SimpleCartParser,
DataFlag.DATA_HAS_PUBLIC_SECTION
), (
"basic (no IDs)",
BasicCartParser,
DataFlag.DATA_CHECKSUM_INVERTED
), (
"basic + TID",
BasicCartParser,
DataFlag.DATA_HAS_TRACE_ID | DataFlag.DATA_CHECKSUM_INVERTED
), (
"basic + SID",
BasicCartParser,
DataFlag.DATA_HAS_CART_ID | DataFlag.DATA_CHECKSUM_INVERTED
), (
"basic + TID, SID",
BasicCartParser,
DataFlag.DATA_HAS_TRACE_ID | DataFlag.DATA_HAS_CART_ID
| DataFlag.DATA_CHECKSUM_INVERTED
), (
"basic + prefix, TID, SID",
BasicCartParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_HAS_TRACE_ID
| DataFlag.DATA_HAS_CART_ID | DataFlag.DATA_CHECKSUM_INVERTED
), (
# Used by most pre-ZS01 Bemani games
"basic + prefix, all IDs",
BasicCartParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_HAS_TRACE_ID
| DataFlag.DATA_HAS_CART_ID | DataFlag.DATA_HAS_INSTALL_ID
| DataFlag.DATA_HAS_SYSTEM_ID | DataFlag.DATA_CHECKSUM_INVERTED
), (
"extended (no IDs)",
ExtendedCartParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_CHECKSUM_INVERTED
), (
"extended (no IDs, alt)",
ExtendedCartParser,
DataFlag.DATA_HAS_CODE_PREFIX
), (
# Used by GX706
"extended (no IDs, GX706)",
ExtendedCartParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_GX706_WORKAROUND
), (
# Used by GE936/GK936 and all ZS01 Bemani games
"extended + all IDs",
ExtendedCartParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_HAS_TRACE_ID
| DataFlag.DATA_HAS_CART_ID | DataFlag.DATA_HAS_INSTALL_ID
| DataFlag.DATA_HAS_SYSTEM_ID | DataFlag.DATA_HAS_PUBLIC_SECTION
| DataFlag.DATA_CHECKSUM_INVERTED
)
)
_KNOWN_ROM_HEADER_FORMATS: Sequence[tuple[str, Type, DataFlag]] = (
(
"extended (no MD5)",
ExtendedROMHeaderParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_CHECKSUM_INVERTED
), (
"extended (no MD5, alt)",
ExtendedROMHeaderParser,
DataFlag.DATA_HAS_CODE_PREFIX
), (
# Used by GX706
"extended (no MD5, GX706)",
ExtendedROMHeaderParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_GX706_WORKAROUND
), (
"extended + MD5",
ExtendedROMHeaderParser,
DataFlag.DATA_HAS_CODE_PREFIX | DataFlag.DATA_HAS_SYSTEM_ID
| DataFlag.DATA_CHECKSUM_INVERTED
)
)
def newCartParser(dump: CartDump) -> CartParser:
for name, constructor, flags in reversed(_KNOWN_CART_FORMATS):
try:
parser: Any = constructor(dump, flags)
except ParserError as exc:
logging.debug(f"parsing as {name} failed, {exc}")
continue
return parser
raise RuntimeError("no known data format found")
def newROMHeaderParser(dump: ROMHeaderDump) -> ROMHeaderParser:
for name, constructor, flags in reversed(_KNOWN_ROM_HEADER_FORMATS):
try:
parser: Any = constructor(dump, flags)
except ParserError as exc:
logging.debug(f"parsing as {name} failed, {exc}")
continue
return parser
raise RuntimeError("no known data format found")