beerpsi-x/cm/cmpack.py

392 lines
12 KiB
Python
Raw Normal View History

2024-07-10 11:44:47 +02:00
# pyright: reportAny=false
"""
A script to work with the packed filesystem used by CardMaker
(CardMaker_Data/StreamingAssets/MU3/Data/A000.pac).
Usage:
- Extract the contents of a pac file:
python cmpack.py x <.pac file> <output directory>
- Create a pac file from a base directory:
python cmpack.py a <.pac file> <input directory>
- List contents of a .pac file:
python cmpack.py l <.pac file>
The pac file format:
- Header (32 bytes)
- Magic: 50 41 43 4B ("PACK")
- Reserved: The game fills in 4 random bytes.
- Number of entries: 64-bit signed integer
- Offset of string table: 64-bit signed integer. The string table is used for storing
filenames, and is encoded with UTF-16 LE.
- Offset of file data: 64-bit signed integer.
- Entries (28 byte * number of entries)
- Type: 32-bit signed integer
- 0: File
- 1: Directory
- Flags: 32-bit signed integer, always set to 0.
- Offset of filename: 32-bit signed integer, is the *character* offset of the filename
in the string. **NOT A BYTE OFFSET!**
- Length of filename: 32-bit signed integer
- Length of file data: 32-bit signed integer
- Offset of file data: 64-bit signed integer, starting from the offset of file data declared
in the header.
- String table
- File data
- Adler32 checksum of all of the above
"""
import argparse
import os
import random
import struct
import zlib
from collections.abc import Generator
from dataclasses import dataclass
from io import BufferedIOBase
from pathlib import Path
from typing import Any, NamedTuple, cast
PACK_MAGIC = 0x4B434150
PACK_HEADER_FORMAT = "<IIqqq"
PACK_ENTRY_FORMAT = "<iiiiiq"
class PackHeader(NamedTuple):
"""
The header of a CardMaker pack file.
"""
magic: int
"""
Constant `0x4B434150`.
"""
reserved: int
"""
Reserved field.
"""
num_entries: int
"""
Number of entries (files and folders) in the pack.
"""
offset_string: int
"""
Offset to the string table (UTF-16 LE) containing all filenames.
"""
offset_data: int
"""
Offset to actual file data.
"""
class PackEntry(NamedTuple):
"""
A CardMaker pack file entry.
"""
type: int
"""
The type of the entry: `0` for files, and `1` for folders.
"""
flags: int
"""
Unused and should be set to `0`.
"""
name_offset: int
"""
The **character** offset of the filename in the string table.
"""
name_length: int
"""
The length of the filename.
"""
data_length: int
"""
If entry is a file, the length of the file data. If entry is a folder,
the number of children.
"""
data_offset: int
"""
If entry is a file, the offset of the file data, starting from
:attr:`PackHeader.offset_data`. If entry is a folder, the starting index
of its children in the list of entries.
"""
class PackFile:
def __init__(self, entry: PackEntry, string_table: str) -> None:
if entry.type != 0:
raise ValueError(f"Invalid entry ID {entry.type} (expected 0).")
self.name = string_table[
entry.name_offset : entry.name_offset + entry.name_length
]
self.offset = entry.data_offset
self.length = entry.data_length
class PackFolder:
def __init__(
self, entries: list[PackEntry], entry_id: int, string_table: str
) -> None:
entry = entries[entry_id]
if entry.type != 1:
raise ValueError(f"Invalid entry ID {entry.type} (expected 1).")
self.name = string_table[
entry.name_offset : entry.name_offset + entry.name_length
]
self.files: dict[str, PackFile] = {}
self.folders: dict[str, PackFolder] = {}
for i in range(entry.data_length):
child_id = entry.data_offset + i
child_entry = entries[child_id]
if child_entry.type == 0:
f = PackFile(child_entry, string_table)
self.files[f.name] = f
elif child_entry.type == 1:
f = PackFolder(entries, child_id, string_table)
self.folders[f.name] = f
else:
raise ValueError(
f"Entry {child_id} has unknown type {child_entry.type}."
)
@dataclass
class Pack:
header: PackHeader
entries: list[PackEntry]
root: PackFolder
def decode_pack_meta(f: BufferedIOBase):
header = PackHeader._make(struct.unpack(PACK_HEADER_FORMAT, f.read(32)))
if header.magic != PACK_MAGIC:
raise Exception(f"Invalid pack file: {header.magic:x} != {PACK_MAGIC:x}")
_ = f.seek(header.offset_string, os.SEEK_SET)
string_table = f.read(header.offset_data - header.offset_string).decode("utf-16-le")
_ = f.seek(32, os.SEEK_SET)
entries: list[PackEntry] = []
for _ in range(header.num_entries):
entries.append(PackEntry._make(struct.unpack(PACK_ENTRY_FORMAT, f.read(28))))
tree = PackFolder(entries, 0, string_table)
return Pack(header, entries, tree)
def traverse_pack(
folder: PackFolder, path: Path
) -> Generator[tuple[Path, PackFile], Any, None]:
for file in folder.files.values():
yield (path / file.name, file)
for folder in folder.folders.values():
child_path = path / folder.name
yield from traverse_pack(folder, child_path)
def walk_breadth_first(root: Path) -> Generator[tuple[Path, list[Path]], Any, None]:
children = list(root.iterdir())
yield root, children
for path in children:
if path.is_dir():
yield from walk_breadth_first(path)
def pack_extract(args: argparse.Namespace):
archive_name = cast(Path, args.archive_name)
output_directory = cast(Path, args.output_directory)
if output_directory.exists() and not output_directory.is_dir():
raise ValueError(f"Output {output_directory} is not a folder.")
if not output_directory.exists():
output_directory.mkdir(parents=True)
with archive_name.open("rb") as f:
actual_checksum = zlib.adler32(f.read()[:-4])
_ = f.seek(-4, os.SEEK_END)
expected_checksum = struct.unpack("<i", f.read(4))[0]
if actual_checksum != expected_checksum:
raise RuntimeError(f"Invalid pack file, checksums don't match: {actual_checksum:x} != {expected_checksum:x}")
_ = f.seek(0)
pack = decode_pack_meta(f)
try:
from tqdm import tqdm
pb = tqdm(
desc="Extracting pack",
total=len([x for x in pack.entries if x.type == 0]),
)
except ImportError:
pb = None
for path, file in traverse_pack(pack.root, output_directory):
path.parent.mkdir(parents=True, exist_ok=True)
_ = f.seek(pack.header.offset_data + file.offset, os.SEEK_SET)
with path.open("wb") as fo:
_ = fo.write(f.read(file.length))
if pb is not None:
_ = pb.update(1)
def pack_archive(args: argparse.Namespace):
archive_name = cast(Path, args.archive_name)
root = cast(Path, args.root_directory)
string_table = ""
string_table_lookup: dict[str, int] = {}
entries = [
{
"type": 1,
"flags": 0,
"name_length": 0,
"name_offset": 0,
"data_offset": 1,
}
]
entry_indexes = {root: 0}
files: list[Path] = []
file_offset = 0
# Figure out the pack entries by walking the file tree
# breadth first.
for parent, children in walk_breadth_first(root):
if parent == root:
entries[0]["data_length"] = len(children)
if entry_index := entry_indexes.get(parent):
entries[entry_index]["data_offset"] = len(entries)
entries[entry_index]["data_length"] = len(children)
for child in children:
if (name_offset := string_table_lookup.get(child.name)) is None:
name_offset = string_table_lookup[child.name] = len(string_table)
string_table += child.name
entry = {
"flags": 0,
"name_length": len(child.name),
"name_offset": name_offset,
}
if child.is_dir():
entry["type"] = 1
entry_indexes[child] = len(entries)
# data_offset and data_length will be filled in when we walk to them as the parent.
else:
size = child.stat().st_size
entry["type"] = 0
entry["data_offset"] = file_offset
entry["data_length"] = size
files.append(child)
file_offset += size
entries.append(entry)
# Actually write the pack.
with archive_name.open("wb") as f:
checksum = 1
string_table_bytes = string_table.encode("utf-16-le")
num_entries = len(entries)
# 32 bytes for the header, and 28 bytes for each entry in the pack.
offset_string = 32 + 28 * num_entries
header = PackHeader(
PACK_MAGIC,
random.randrange(0, 255) << 24
| random.randrange(0, 255) << 16
| random.randrange(0, 255) << 8
| random.randrange(0, 255),
num_entries,
offset_string,
offset_string + len(string_table_bytes),
)
data = struct.pack(PACK_HEADER_FORMAT, *header)
checksum = zlib.adler32(data, checksum)
_ = f.write(data)
for entry in entries:
data = struct.pack(PACK_ENTRY_FORMAT, *PackEntry(**entry))
checksum = zlib.adler32(data, checksum)
_ = f.write(data)
checksum = zlib.adler32(string_table_bytes, checksum)
_ = f.write(string_table_bytes)
for file in files:
with file.open("rb") as fp:
data = fp.read()
checksum = zlib.adler32(data, checksum)
_ = f.write(data)
_ = f.write(struct.pack("<i", checksum))
def pack_list(args: argparse.Namespace):
archive_name = cast(Path, args.archive_name)
with archive_name.open("rb") as f:
pack = decode_pack_meta(f)
print(" Size Name")
print("------ ------")
for path, file in traverse_pack(pack.root, Path("")):
print(f"{file.length:6} {path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for working with CardMaker pack files"
)
subcommands = parser.add_subparsers(required=True)
extract_command = subcommands.add_parser("x", help="Extract a CardMaker pack")
_ = extract_command.add_argument("archive_name", type=Path)
_ = extract_command.add_argument("output_directory", type=Path)
_ = extract_command.set_defaults(func=pack_extract)
archive_command = subcommands.add_parser("a", help="Create a CardMaker pack")
_ = archive_command.add_argument("archive_name", type=Path)
_ = archive_command.add_argument("root_directory", type=Path)
_ = archive_command.set_defaults(func=pack_archive)
list_command = subcommands.add_parser("l", help="List contents of a CardMaker pack")
_ = list_command.add_argument("archive_name", type=Path)
_ = list_command.set_defaults(func=pack_list)
args = parser.parse_args()
args.func(args)