1
0
mirror of synced 2024-11-12 01:00:46 +01:00

Initial code for The*BishiBashi graphics file unpacker.

This commit is contained in:
Jennifer Taylor 2020-11-06 02:08:21 +00:00
parent d4ba50a5cd
commit 786676fd26
5 changed files with 768 additions and 1 deletions

198
bemani/format/dxt.py Normal file
View File

@ -0,0 +1,198 @@
"""
S3TC DXT1/DXT5 Texture Decompression
Adapted from https://github.com/leamsii/Python-DXT-Decompress to add types
and take in bytes instead of file pointers. Inspired by Benjamin Dobell.
Original C++ code https://github.com/Benjamin-Dobell/s3tc-dxt-decompression
"""
import io
import struct
from typing import List, Optional, Tuple
def unpack(_bytes: bytes) -> int:
STRUCT_SIGNS = {
1: 'B',
2: 'H',
4: 'I',
8: 'Q'
}
return struct.unpack('<' + STRUCT_SIGNS[len(_bytes)], _bytes)[0]
# This function converts RGB565 format to raw pixels
def unpackRGB(packed: int) -> Tuple[int, int, int, int]:
R = (packed >> 11) & 0x1F
G = (packed >> 5) & 0x3F
B = (packed) & 0x1F
R = (R << 3) | (R >> 2)
G = (G << 2) | (G >> 4)
B = (B << 3) | (B >> 2)
return (R, G, B, 255)
class DXTBuffer:
def __init__(self, width: int, height: int):
self.width = width
self.height = height
self.block_countx = self.width // 4
self.block_county = self.height // 4
self.decompressed_buffer: List[Optional[bytes]] = [None] * ((width * height) * 2) # Dont ask me why
def DXT5Decompress(self, filedata: bytes) -> bytes:
# Loop through each block and decompress it
file = io.BytesIO(filedata)
for row in range(self.block_county):
for col in range(self.block_countx):
# Get the alpha values
a0 = unpack(file.read(1))
a1 = unpack(file.read(1))
atable = file.read(6)
acode0 = atable[2] | (atable[3] << 8) | (atable[4] << 16) | (atable[5] << 24)
acode1 = atable[0] | (atable[1] << 8)
# Color 1 color 2, color look up table
c0 = unpack(file.read(2))
c1 = unpack(file.read(2))
ctable = unpack(file.read(4))
# The 4x4 Lookup table loop
for j in range(4):
for i in range(4):
alpha = self.getAlpha(j, i, a0, a1, acode0, acode1)
self.getColors(
row * 4,
col * 4,
i,
j,
ctable,
unpackRGB(c0),
unpackRGB(c1),
alpha,
) # Set the color for the current pixel
return b''.join([x for x in self.decompressed_buffer if x is not None])
def DXT1Decompress(self, filedata: bytes) -> bytes:
# Loop through each block and decompress it
file = io.BytesIO(filedata)
for row in range(self.block_county):
for col in range(self.block_countx):
# Color 1 color 2, color look up table
c0 = unpack(file.read(2))
c1 = unpack(file.read(2))
ctable = unpack(file.read(4))
# The 4x4 Lookup table loop
for j in range(4):
for i in range(4):
self.getColors(
row * 4,
col * 4,
i,
j,
ctable,
unpackRGB(c0),
unpackRGB(c1),
255,
) # Set the color for the current pixel
return b''.join([_ for _ in self.decompressed_buffer if _ != 'X'])
def getColors(
self,
x: int,
y: int,
i: int,
j: int,
ctable: int,
c0: Tuple[int, int, int, int],
c1: Tuple[int, int, int, int],
alpha: int,
) -> None:
code = (ctable >> (2 * (4 * i + j))) & 0x03 # Get the color of the current pixel
pixel_color = None
r0 = c0[0]
g0 = c0[1]
b0 = c0[2]
r1 = c1[0]
g1 = c1[1]
b1 = c1[2]
# Main two colors
if code == 0:
pixel_color = (r0, g0, b0, alpha)
if code == 1:
pixel_color = (r1, g1, b1, alpha)
# Use the lookup table to determine the other two colors
if c0 > c1:
if code == 2:
pixel_color = ((2 * r0 + r1) // 3, (2 * g0 + g1) // 3, (2 * b0 + b1) // 3, alpha)
if code == 3:
pixel_color = ((r0 + 2 * r1) // 3, (g0 + 2 * g1) // 3, (b0 + 2 * b1) // 3, alpha)
else:
if code == 2:
pixel_color = ((r0 + r1) // 2, (g0 + g1) // 2, (b0 + b1) // 2, alpha)
if code == 3:
pixel_color = (0, 0, 0, alpha)
# While not surpassing the image dimensions, assign pixels the colors
if (x + i) < self.width:
self.decompressed_buffer[(y + j) * self.width + (x + i)] = (
struct.pack('<B', pixel_color[0]) +
struct.pack('<B', pixel_color[1]) +
struct.pack('<B', pixel_color[2]) +
struct.pack('<B', pixel_color[3])
)
def getAlpha(self, i: int, j: int, a0: int, a1: int, acode0: int, acode1: int) -> int:
# Using the same method as the colors calculate the alpha values
alpha = 255
alpha_index = 3 * (4 * j + i)
alpha_code = None
if alpha_index <= 12:
alpha_code = (acode1 >> alpha_index) & 0x07
elif alpha_index == 15:
alpha_code = (acode1 >> 15) | ((acode0 << 1) & 0x06)
else:
alpha_code = (acode0 >> (alpha_index - 16)) & 0x07
if alpha_code == 0:
alpha = a0
elif alpha_code == 1:
alpha = a1
else:
if a0 > a1:
alpha = ((8 - alpha_code) * a0 + (alpha_code - 1) * a1) // 7
else:
if alpha_code == 6:
alpha = 0
elif alpha_code == 7:
alpha = 255
elif alpha_code == 5:
alpha = (1 * a0 + 4 * a1) // 5
elif alpha_code == 4:
alpha = (2 * a0 + 3 * a1) // 5
elif alpha_code == 3:
alpha = (3 * a0 + 2 * a1) // 5
elif alpha_code == 2:
alpha = (4 * a0 + 1 * a1) // 5
else:
alpha = 0 # For safety
return alpha

View File

@ -32,10 +32,36 @@ class Node:
NODE_TYPE_IP4 = 12
NODE_TYPE_TIME = 13
NODE_TYPE_FLOAT = 14
# 15 is probably a double type?
# These seem to repeat, so they are not all verified or supported.
NODE_TYPE_2S8 = 16
NODE_TYPE_2U8 = 17
NODE_TYPE_2S16 = 18
NODE_TYPE_2U16 = 19
NODE_TYPE_2S32 = 20
NODE_TYPE_2U32 = 21
NODE_TYPE_2S64 = 22
NODE_TYPE_2U64 = 23
NODE_TYPE_3S8 = 26
NODE_TYPE_3U8 = 27
NODE_TYPE_3S16 = 28
NODE_TYPE_3U16 = 29
NODE_TYPE_3S32 = 30
NODE_TYPE_3U32 = 31
NODE_TYPE_3S64 = 32
NODE_TYPE_3U64 = 33
NODE_TYPE_4S8 = 36
NODE_TYPE_4U8 = 37
NODE_TYPE_4S16 = 38
NODE_TYPE_4U16 = 39
NODE_TYPE_4S32 = 40
NODE_TYPE_4U32 = 41
NODE_TYPE_4S64 = 42
NODE_TYPE_4U64 = 43
NODE_TYPE_BOOL = 52
NODE_TYPES = {
@ -137,6 +163,13 @@ class Node:
'int': False,
'composite': False,
},
NODE_TYPE_2S16: {
'name': '2s16',
'enc': 'hh',
'len': 4,
'int': True,
'composite': True,
},
NODE_TYPE_2U16: {
'name': '2u16',
'enc': 'HH',
@ -437,7 +470,7 @@ class Node:
self.__translated_type = Node.NODE_TYPES[type & (~Node.ARRAY_BIT)]
self.__type = type
except KeyError:
raise NodeException(f'Unknown node type {type}')
raise NodeException(f'Unknown node type {type} on node name {self.__name}')
@property
def type(self) -> int:

525
bemani/utils/bishiutils.py Normal file
View File

@ -0,0 +1,525 @@
#! /usr/bin/env python3
import argparse
import os
import os.path
import struct
import sys
from PIL import Image, ImageOps # type: ignore
from typing import Any, List
from bemani.format.dxt import DXTBuffer
from bemani.protocol.binary import BinaryEncoding
from bemani.protocol.lz77 import Lz77
def get_until_null(data: bytes, offset: int) -> bytes:
out = b""
while data[offset] != 0:
out += data[offset:(offset + 1)]
offset += 1
return out
def descramble_text(text: bytes, obfuscated: bool) -> str:
if len(text):
if obfuscated and (text[0] - 0x20) > 0x7F:
# Gotta do a weird demangling where we swap the
# top bit.
return bytes(((x + 0x80) & 0xFF) for x in text).decode('ascii')
else:
return text.decode('ascii')
else:
return ""
def descramble_pman(package_data: bytes, offset: int, obfuscated: bool) -> List[str]:
magic, _, _, _, numentries, _, offset = struct.unpack(
"<4sIIIIII",
package_data[offset:(offset + 28)],
)
if magic != b"PMAN":
raise Exception("Invalid magic value in PMAN structure!")
names = []
if numentries > 0:
# Jump to the offset, parse it out
for i in range(numentries):
file_offset = offset + (i * 12)
_, entry_no, nameoffset = struct.unpack(
"<III",
package_data[file_offset:(file_offset + 12)],
)
if nameoffset == 0:
raise Exception("Expected name offset in PMAN data!")
bytedata = get_until_null(package_data, nameoffset)
name = descramble_text(bytedata, obfuscated)
names.append(name)
return names
def swap32(i: int) -> int:
return struct.unpack("<I", struct.pack(">I", i))[0]
def extract(filename: str, output_dir: str, *, write: bool, verbose: bool = False) -> None:
with open(filename, "rb") as fp:
data = fp.read()
# Suppress debug text unless asked
if verbose:
vprint = print
else:
def vprint(*args: Any) -> None: # type: ignore
pass
# First, check the signature
if data[0:4] != b"2PXT":
raise Exception("Invalid graphic file format!")
# Not sure what words 2 and 3 are, they seem to be some sort of
# version or date?
# Now, grab the file length, verify that we have the right amount
# of data.
length = struct.unpack("<I", data[12:16])[0]
if length != len(data):
raise Exception(f"Invalid graphic file length, expecting {length} bytes!")
# I think that offset 16-20 are the file data offset, but I'm not sure?
header_length = struct.unpack("<I", data[16:20])[0]
# Now, the meat of the file format. Bytes 20-24 are a bitfield for
# what parts of the header exist in the file. We need to understand
# each bit so we know how to skip past each section.
feature_mask = struct.unpack("<I", data[20:24])[0]
header_offset = 24
# Lots of magic happens if this bit is set.
text_obfuscated = bool(feature_mask & 0x20)
legacy_lz = bool(feature_mask & 0x04)
modern_lz = bool(feature_mask & 0x40000)
# Get raw directory where we want to put files
path = os.path.abspath(output_dir)
if feature_mask & 0x01:
# List of textures that exist in the file, with pointers to their data.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
names = []
for x in range(length):
interesting_offset = offset + (x * 12)
if interesting_offset != 0:
name_offset, _, texture_offset = struct.unpack(
"<III",
data[interesting_offset:(interesting_offset + 12)],
)
if name_offset != 0:
# Let's decode this until the first null.
bytedata = get_until_null(data, name_offset)
name = descramble_text(bytedata, text_obfuscated)
names.append(name)
if texture_offset != 0:
filename = os.path.join(path, name)
if not write:
print(f"Would extract {filename}...")
else:
if legacy_lz:
raise Exception("We don't support legacy lz mode!")
elif modern_lz:
# Get size, round up to nearest power of 4
tex_size = struct.unpack(">I", data[texture_offset:(texture_offset + 4)])[0]
tex_size = (tex_size + 3) & (~3)
# Get the data offset
lz_data_offset = texture_offset + 8
lz_data = data[lz_data_offset:(lz_data_offset + tex_size)]
lz77 = Lz77()
print(f"Extracting {filename}...")
raw_data = lz77.decompress(lz_data)
else:
# File data doesn't seem to have any length.
# TODO: Calculate length from width/height.
raw_data = data[(texture_offset + 8):]
raise Exception("Unfinished section, unknown raw length!")
# Now, see if we can extract this data.
magic, _, _, _, width, height, fmt, _, flags2, flags1 = struct.unpack(
"<4sIIIHHBBBB",
raw_data[0:24],
)
if magic != b"TDXT":
raise Exception("Unexpected texture format!")
img = None
if fmt == 0x20:
img = Image.frombytes(
'RGBA', (width, height), raw_data[64:], 'raw', 'BGRA',
)
elif fmt == 0x0E:
img = Image.frombytes(
'RGB', (width, height), raw_data[64:], 'raw', 'RGB',
)
elif fmt == 0x1A:
# DXT5 format.
dxt = DXTBuffer(width, height)
img = Image.frombuffer(
'RGBA',
(width, height),
dxt.DXT5Decompress(raw_data[64:]),
'raw',
'RGBA',
0,
1,
)
img = ImageOps.flip(img).rotate(-90, expand=True)
else:
print(f"Unsupported format {hex(fmt)} for texture {name}")
# Actually place the file down.
os.makedirs(path, exist_ok=True)
with open(f"{filename}.raw", "wb") as bfp:
bfp.write(raw_data)
if img:
with open(f"{filename}.png", "wb") as bfp:
img.save(bfp, format='PNG')
vprint(f"Bit 0x000001 - count: {length}, offset: {hex(offset)}")
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x000001 - NOT PRESENT")
if feature_mask & 0x02:
# Seems to be a structure that duplicates texture names? Maybe this is
# used elsewhere to map sections to textures? The structure includes
# the entry number that seems to correspond with the above table.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
vprint(f"Bit 0x000002 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x000002 - NOT PRESENT")
if feature_mask & 0x04:
vprint("Bit 0x000004 - legacy lz mode on")
else:
vprint("Bit 0x000004 - legacy lz mode off")
if feature_mask & 0x08:
# I *THINK* that this is the mapping between sections and their
# respective textures, but I haven't dug in yet.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
print(f"Bit 0x000008 - count: {length}, offset: {hex(offset)}")
else:
print("Bit 0x000008 - NOT PRESENT")
if feature_mask & 0x10:
# Seems to be a strucure that duplicates the above section?
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
print(f"Bit 0x000010 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
print(f" {name}")
else:
print("Bit 0x000010 - NOT PRESENT")
if feature_mask & 0x20:
vprint(f"Bit 0x000020 - text obfuscation on")
else:
vprint(f"Bit 0x000020 - text obfuscation off")
if feature_mask & 0x40:
# Two unknown bytes, first is a length or a count. Secound is
# an optional offset to grab another set of bytes from.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
# TODO: 0x40 has some weird offset calculations, gotta look into
# this further.
names = []
for x in range(length):
interesting_offset = offset + (x * 12)
if interesting_offset != 0:
interesting_offset = struct.unpack(
"<I",
data[interesting_offset:(interesting_offset + 4)],
)[0]
if interesting_offset != 0:
# Let's decode this until the first null.
bytedata = get_until_null(data, interesting_offset)
name = descramble_text(bytedata, text_obfuscated)
names.append(name)
print(f"Bit 0x000040 - count: {length}, offset: {hex(offset)}")
for name in names:
print(f" {name}")
else:
print("Bit 0x000040 - NOT PRESENT")
if feature_mask & 0x80:
# One unknown byte, treated as an offset.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
print(f"Bit 0x000080 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
print(f" {name}")
else:
print("Bit 0x000080 - NOT PRESENT")
if feature_mask & 0x100:
# Two unknown bytes, first is a length or a count. Secound is
# an optional offset to grab another set of bytes from.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
print(f"Bit 0x000100 - count: {length}, offset: {hex(offset)}")
# TODO: We do something if length is > 0, we use the magic flag
# from above in this case to optionally transform each thing we
# extract.
else:
print("Bit 0x000100 - NOT PRESENT")
if feature_mask & 0x200:
# One unknown byte, treated as an offset.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
print(f"Bit 0x000200 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
print(f" {name}")
else:
print("Bit 0x000200 - NOT PRESENT")
if feature_mask & 0x400:
# One unknown byte, treated as an offset.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
print(f"Bit 0x000400 - offset: {hex(offset)}")
else:
print("Bit 0x000400 - NOT PRESENT")
if feature_mask & 0x800:
# This is the names of the animations as far as I can tell.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
pp_19 = length
pp_20 = offset
vprint(f"Bit 0x000800 - count: {length}, offset: {hex(offset)}")
names = []
for x in range(length):
interesting_offset = offset + (x * 12)
if interesting_offset != 0:
interesting_offset = struct.unpack(
"<I",
data[interesting_offset:(interesting_offset + 4)],
)[0]
if interesting_offset != 0:
# Let's decode this until the first null.
bytedata = get_until_null(data, interesting_offset)
name = descramble_text(bytedata, text_obfuscated)
names.append(name)
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x000800 - NOT PRESENT")
pp_19 = 0
pp_20 = 0
if feature_mask & 0x1000:
# Seems to be a secondary structure mirroring the above.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
vprint(f"Bit 0x001000 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x001000 - NOT PRESENT")
if feature_mask & 0x2000:
# I am making a very preliminary guess that these are shapes used along
# with animations specified below. The names in these sections tend to
# have the word "shape" in them.
length, offset = struct.unpack("<II", data[header_offset:(header_offset + 8)])
header_offset += 8
vprint(f"Bit 0x002000 - count: {length}, offset: {hex(offset)}")
# TODO: We do a LOT of extra stuff with this one, if count > 0...
names = []
for x in range(length):
interesting_offset = offset + (x * 12)
if interesting_offset != 0:
interesting_offset = struct.unpack(
"<I",
data[interesting_offset:(interesting_offset + 4)],
)[0]
if interesting_offset != 0:
# Let's decode this until the first null.
bytedata = get_until_null(data, interesting_offset)
name = descramble_text(bytedata, text_obfuscated)
names.append(name)
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x002000 - NOT PRESENT")
if feature_mask & 0x4000:
# Seems to be a secondary section mirroring the names from above.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
vprint(f"Bit 0x004000 - offset: {hex(offset)}")
if offset != 0:
names = descramble_pman(data, offset, text_obfuscated)
for name in names:
vprint(f" {name}")
else:
vprint("Bit 0x004000 - NOT PRESENT")
if feature_mask & 0x8000:
# One unknown byte, treated as an offset.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
vprint(f"Bit 0x008000 - offset: {hex(offset)}")
else:
vprint("Bit 0x008000 - NOT PRESENT")
if feature_mask & 0x10000:
# Included font package, BINXRPC encoded.
offset, length = struct.unpack("<II", data[header_offset:(header_offset + 8)])
binxrpc_offset = struct.unpack("<I", data[(offset + 8):(offset + 12)])[0]
header_offset += 4
if binxrpc_offset != 0:
benc = BinaryEncoding()
fontdata = benc.decode(data[binxrpc_offset:(binxrpc_offset + length)])
else:
fontdata = None
vprint(f"Bit 0x010000 - offset: {hex(offset)}, binxrpc offset: {hex(binxrpc_offset)}")
if fontdata is not None:
filename = os.path.join(path, "fontinfo.xml")
if write:
os.makedirs(path, exist_ok=True)
print(f"Writing {filename} font information...")
with open(filename, "w") as sfp:
sfp.write(str(fontdata))
else:
print(f"Would write {filename} font information...")
else:
vprint("Bit 0x010000 - NOT PRESENT")
if feature_mask & 0x20000:
# I am beginning to suspect that this is animation/level data. I have
# no idea what "afp" is.
offset = struct.unpack("<I", data[header_offset:(header_offset + 4)])[0]
header_offset += 4
vprint(f"Bit 0x020000 - offset: {hex(offset)}")
if offset > 0 and pp_19 > 0 and pp_20 > 0:
for x in range(pp_19):
structure_offset = offset + (x * 12)
tex_info_ptr = pp_20 + (x * 12)
_, tex_type, afp_header = struct.unpack(
"<III",
data[structure_offset:(structure_offset + 12)]
)
tex_name_offset, _, tex_afp_data_offset = struct.unpack(
"<III",
data[tex_info_ptr:(tex_info_ptr + 12)],
)
bytedata = get_until_null(data, tex_name_offset)
name = descramble_text(bytedata, text_obfuscated)
afp_data = hex(tex_afp_data_offset)
vprint(f" type: {hex(tex_type)}, afp_offset: {hex(afp_header)}, name: {name}, data: {afp_data}")
else:
vprint("Bit 0x020000 - NOT PRESENT")
if feature_mask & 0x40000:
vprint("Bit 0x040000 - modern lz mode on")
else:
vprint("Bit 0x040000 - modern lz mode off")
if header_offset != header_length:
raise Exception("Failed to parse bitfield of header correctly!")
def main() -> int:
parser = argparse.ArgumentParser(description="BishiBashi graphic file unpacker.")
parser.add_argument(
"file",
metavar="FILE",
help="The file to extract",
)
parser.add_argument(
"dir",
metavar="DIR",
help="Directory to extract to",
)
parser.add_argument(
"-p",
"--pretend",
action="store_true",
help="Pretend to extract instead of extracting.",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Display verbuse debugging output.",
)
args = parser.parse_args()
extract(args.file, args.dir, write=not args.pretend, verbose=args.verbose)
return 0
if __name__ == "__main__":
sys.exit(main())

10
bishiutils Executable file
View File

@ -0,0 +1,10 @@
#! /usr/bin/env python3
import os
path = os.path.abspath(os.path.dirname(__file__))
name = os.path.basename(__file__)
import sys
sys.path.append(path)
import runpy
runpy.run_module(f"bemani.utils.{name}", run_name="__main__")

View File

@ -3,6 +3,7 @@
declare -a arr=(
"api"
"arcutils"
"bishiutils"
"bemanishark"
"binutils"
"cardconvert"