2019-12-08 22:43:49 +01:00
|
|
|
import struct
|
2021-06-07 07:46:39 +02:00
|
|
|
from typing import Optional, List, Dict, Any
|
|
|
|
from typing_extensions import Final
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
from bemani.protocol.stream import InputStream, OutputStream
|
|
|
|
from bemani.protocol.node import Node
|
|
|
|
|
|
|
|
|
|
|
|
class BinaryEncodingException(Exception):
|
|
|
|
"""
|
|
|
|
Generic exception to be thrown when we encounter an issue decoding a binary stream
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
class PackedOrdering:
|
|
|
|
"""
|
|
|
|
A class that helps us encapsulate Konami's batshit backtracking hole-fill algorithm.
|
|
|
|
Everything is aligned on a boundary appropriate for its data size. Strings and arrays are
|
|
|
|
forced to be aligned to a 4 byte boundary on account of having an integer length field.
|
|
|
|
All of these are padded to 4 bytes in terms of the room they take up in the stream.
|
|
|
|
For the things that are 2 byte or 1 byte aligned, we end up sticking them after each other
|
|
|
|
in 4 byte increments. That is, to say, if we have a unsigned byte to pack, we reserve 4 bytes
|
|
|
|
and stick it in the first byte slot, and if up to three additional bytes come in we will pack
|
|
|
|
them after this in sequential order. It would make sense to not pad out strings and arrays and
|
|
|
|
store bytes/shorts in these unused locations, but that's not what actually happens. Also note
|
|
|
|
that we will never pack bytes after a short or vice versa, even if there is room. This also explains
|
|
|
|
the bizarre behavior of not using spare bytes after strings or arrays. I'll emphasize again:
|
|
|
|
everything is stored aligned, and in a 4 byte chunk, only similarly-sized objects can be packed. If
|
|
|
|
this 4 byte chunk is already partially occupied, we can only add another thing to it if 1) the
|
|
|
|
item being added is the same size as the object that exists and 2) the object can be added with
|
|
|
|
the correct alignment.
|
|
|
|
|
|
|
|
A simple example:
|
|
|
|
[1: byte] [2: byte] [3: integer]
|
|
|
|
Packing would look like this (assuming all locations are a byte):
|
|
|
|
1 2 0 0 3 3 3 3
|
|
|
|
|
|
|
|
An example:
|
|
|
|
[1: byte] [2: string, length 3] [3: short] [4: byte]
|
|
|
|
Packing would look like this (assuming all locations are a byte):
|
|
|
|
1 4 0 0 2 2 2 2 2 2 2 0 3 3 0 0
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, size: int, allow_expansion: bool=False) -> None:
|
|
|
|
"""
|
|
|
|
Initialize with a known size. If this is to be used to create a packing instead of deduce
|
|
|
|
a packing, then allow_expansion should be set to true and new holes will be created when
|
|
|
|
needed. If this is to be used for decoding a current packing, allow_expansion should be set
|
|
|
|
to False to ensure we don't choose locations outside the buffer.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
size - Number of bytes to work with as an integer
|
|
|
|
allow_expansion - Boolean describing whether to add to the end of the order when needed
|
|
|
|
"""
|
2021-05-31 20:08:31 +02:00
|
|
|
self.order: List[Optional[int]] = []
|
2019-12-08 22:43:49 +01:00
|
|
|
self.expand = allow_expansion
|
|
|
|
|
2021-07-07 00:29:41 +02:00
|
|
|
for _ in range(size):
|
2019-12-08 22:43:49 +01:00
|
|
|
self.order.append(None)
|
|
|
|
self.__orderlen = size
|
|
|
|
self.__lastbyte = 0
|
|
|
|
self.__lastshort = 0
|
|
|
|
self.__lastint = 0
|
|
|
|
|
|
|
|
def __append_empty(self) -> None:
|
|
|
|
self.order.append(None)
|
|
|
|
self.__orderlen = self.__orderlen + 1
|
|
|
|
|
|
|
|
def mark_used(self, size: int, offset: int, round_to: int=1) -> None:
|
|
|
|
"""
|
|
|
|
Mark size bytes at offset as being used. If needed, round to the nearest byte/half/integer.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
size - Number of bytes to mark
|
|
|
|
offset - Offset into binary chunk to start marking
|
|
|
|
round_to - Optional integer specifying how many bytes to round to. Valid values are 1, 2 and 4
|
|
|
|
"""
|
|
|
|
# Round to nearest value if needed
|
|
|
|
while (size & (round_to - 1)) != 0:
|
|
|
|
size = size + 1
|
|
|
|
|
|
|
|
# Expand buffer if needed
|
|
|
|
if self.expand:
|
|
|
|
while self.__orderlen < (size + offset):
|
|
|
|
self.__append_empty()
|
|
|
|
|
|
|
|
# Mark buffer as used
|
|
|
|
for i in range(size):
|
|
|
|
self.order[i + offset] = size
|
|
|
|
|
|
|
|
def get_next_byte(self) -> Optional[int]:
|
|
|
|
"""
|
|
|
|
Returns an integer location where the next byte will be found/stored, respecting Konami logic.
|
|
|
|
Will return None if its not possible to find this integer a spot and we aren't expanding.
|
|
|
|
"""
|
|
|
|
# If we expand for additions, make sure we've padded to a 4 byte boundary
|
|
|
|
if self.expand:
|
|
|
|
while (self.__orderlen & 3) != 0:
|
|
|
|
self.__append_empty()
|
|
|
|
|
|
|
|
for i in range(self.__lastbyte, self.__orderlen, 4):
|
|
|
|
if self.order[i] is not None:
|
|
|
|
# See if this has room for a byte
|
|
|
|
for j in range(0, 4):
|
|
|
|
if self.order[i + j] == 1:
|
|
|
|
# This is okay, we can pack after this
|
|
|
|
continue
|
|
|
|
elif self.order[i + j] is None:
|
|
|
|
# This is open, pack here
|
|
|
|
self.__lastbyte = i
|
|
|
|
return i + j
|
|
|
|
else:
|
|
|
|
# This is something else, can't pack here
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# Couldn't find optimal packing, pack here
|
|
|
|
self.__lastbyte = i
|
|
|
|
return i
|
|
|
|
|
|
|
|
if self.expand:
|
|
|
|
self.__lastbyte = self.__orderlen
|
|
|
|
return self.__orderlen
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def get_next_short(self) -> Optional[int]:
|
|
|
|
"""
|
|
|
|
Returns an integer location where the next short will be found/stored, respecting Konami logic.
|
|
|
|
Will return None if its not possible to find this integer a spot and we aren't expanding.
|
|
|
|
"""
|
|
|
|
# If we expand for additions, make sure we've padded to a 4 byte boundary
|
|
|
|
if self.expand:
|
|
|
|
while (self.__orderlen & 3) != 0:
|
|
|
|
self.__append_empty()
|
|
|
|
|
|
|
|
for i in range(self.__lastshort, self.__orderlen, 4):
|
|
|
|
if self.order[i] is not None:
|
|
|
|
for j in range(0, 4, 2):
|
|
|
|
if self.order[i + j] == 2 and self.order[i + j + 1] == 2:
|
|
|
|
# This is okay, we can pack after this
|
|
|
|
continue
|
|
|
|
elif self.order[i + j] is None and self.order[i + j + 1] is None:
|
|
|
|
# This is open, pack here
|
|
|
|
self.__lastshort = i
|
|
|
|
return i + j
|
|
|
|
else:
|
|
|
|
# This is something else, can't pack here
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# Couldn't find optimal packing, pack here
|
|
|
|
self.__lastshort = i
|
|
|
|
return i
|
|
|
|
|
|
|
|
if self.expand:
|
|
|
|
self.__lastshort = self.__orderlen
|
|
|
|
return self.__orderlen
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def get_next_int(self) -> Optional[int]:
|
|
|
|
"""
|
|
|
|
Returns an integer location where the next integer will be found/stored, respecting Konami logic.
|
|
|
|
Will return None if its not possible to find this integer a spot and we aren't expanding.
|
|
|
|
"""
|
|
|
|
# If we expand for additions, make sure we've padded to a 4 byte boundary
|
|
|
|
if self.expand:
|
|
|
|
while (self.__orderlen & 3) != 0:
|
|
|
|
self.__append_empty()
|
|
|
|
|
|
|
|
for i in range(self.__lastint, self.__orderlen, 4):
|
|
|
|
if self.order[i] is not None:
|
|
|
|
continue
|
|
|
|
if self.order[i + 1] is not None:
|
|
|
|
continue
|
|
|
|
if self.order[i + 2] is not None:
|
|
|
|
continue
|
|
|
|
if self.order[i + 3] is not None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
self.__lastint = i
|
|
|
|
return i
|
|
|
|
|
|
|
|
if self.expand:
|
|
|
|
self.__lastint = self.__orderlen
|
|
|
|
return self.__orderlen
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def node_to_body_ordering(node: Node, include_children: bool=True, include_void: bool=False) -> List[Dict[str, Any]]:
|
|
|
|
"""
|
|
|
|
Walk this node, attributes and children in the correct order to create a node
|
|
|
|
ordering for the purpose of mapping Node objects to their actual data
|
|
|
|
in a binary packet data chunk. We will use this to unpack data to determine the
|
|
|
|
values of nodes, or to create the data that goes with these nodes.
|
|
|
|
|
|
|
|
Paramters:
|
|
|
|
include_children - Whether this ordering should include children. Defaults to True.
|
|
|
|
include_void - Whether this ordering should include positions for void nodes. Defaults
|
|
|
|
to false.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List of dictionary objects:
|
|
|
|
- type - 'attribute' or 'value' to specify that this position in the
|
|
|
|
node walk is a string attribute or a node value
|
|
|
|
- node - This Node object, for the purpose of assignment
|
|
|
|
- name - The name of the attribute if type is 'attribute' or the name
|
|
|
|
of the node if type is 'value'
|
|
|
|
- alignment - The alignment that this particular data object requiers
|
|
|
|
"""
|
|
|
|
ordering = []
|
|
|
|
|
|
|
|
# Include the node itself if it has a value or we include voids
|
|
|
|
if node.data_length != 0 or include_void:
|
|
|
|
alignment = node.data_length
|
|
|
|
if alignment is None:
|
|
|
|
# Take care of string types
|
|
|
|
alignment = 4
|
|
|
|
if alignment > 4:
|
|
|
|
# Take care of 64 bit integers that are 32 bit aligned
|
|
|
|
alignment = 4
|
|
|
|
|
|
|
|
ordering.append({
|
|
|
|
'type': 'value',
|
|
|
|
'node': node,
|
|
|
|
'name': node.name,
|
|
|
|
'alignment': alignment,
|
|
|
|
})
|
|
|
|
|
|
|
|
order = sorted(node.attributes.keys())
|
|
|
|
for attr in order:
|
|
|
|
ordering.append({
|
|
|
|
'type': 'attribute',
|
|
|
|
'node': node,
|
|
|
|
'name': attr,
|
|
|
|
'alignment': 4,
|
|
|
|
})
|
|
|
|
|
|
|
|
if include_children:
|
|
|
|
for child in node.children:
|
|
|
|
ordering.extend(PackedOrdering.node_to_body_ordering(child))
|
|
|
|
|
|
|
|
return ordering
|
|
|
|
|
|
|
|
|
|
|
|
class BinaryDecoder:
|
|
|
|
"""
|
|
|
|
A class capable of taking a binary blob and decoding it to a Node tree.
|
|
|
|
"""
|
|
|
|
|
2022-06-18 02:59:31 +02:00
|
|
|
def __init__(self, data: bytes, encoding: str, compressed: bool) -> None:
|
2019-12-08 22:43:49 +01:00
|
|
|
"""
|
|
|
|
Initialize the object.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
- data - A binary blob of data to be decoded
|
|
|
|
- encoding - A string representing the text encoding for string elements. Should be either
|
|
|
|
'shift-jis', 'euc-jp' or 'utf-8'
|
|
|
|
"""
|
|
|
|
self.stream = InputStream(data)
|
|
|
|
self.encoding = encoding
|
2022-06-18 02:59:31 +02:00
|
|
|
self.compressed = compressed
|
2019-12-08 22:43:49 +01:00
|
|
|
self.executed = False
|
|
|
|
|
|
|
|
def __read_node_name(self) -> str:
|
|
|
|
"""
|
|
|
|
Given the current position in the stream, read the 6-bit-byte packed string name of the
|
|
|
|
node.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A string representing the name in ascii
|
|
|
|
"""
|
|
|
|
length = self.stream.read_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if length is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node name length!")
|
2022-06-18 02:59:31 +02:00
|
|
|
|
|
|
|
if not self.compressed:
|
|
|
|
if length < 0x40:
|
|
|
|
raise BinaryEncodingException("Node name length under decompressed minimum")
|
|
|
|
elif length < 0x80:
|
|
|
|
length -= 0x3f
|
|
|
|
else:
|
|
|
|
length_ex = self.stream.read_int()
|
|
|
|
if length_ex is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node name length!")
|
|
|
|
length = (length << 8) | length_ex
|
|
|
|
length -= 0x7fbf
|
|
|
|
|
|
|
|
if length > BinaryEncoding.NAME_MAX_DECOMPRESSED:
|
|
|
|
raise BinaryEncodingException("Node name length over decompressed limit")
|
|
|
|
|
|
|
|
name = self.stream.read_blob(length)
|
|
|
|
if name is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node name!")
|
|
|
|
|
|
|
|
return name.decode(self.encoding)
|
|
|
|
|
|
|
|
if length > BinaryEncoding.NAME_MAX_COMPRESSED:
|
|
|
|
raise BinaryEncodingException("Node name length over compressed limit")
|
|
|
|
|
2019-12-08 22:43:49 +01:00
|
|
|
binary_length = int(((length * 6) + 7) / 8)
|
|
|
|
|
|
|
|
def int_to_bin(integer: int) -> str:
|
|
|
|
val = bin(integer)[2:]
|
|
|
|
while len(val) < 8:
|
|
|
|
val = '0' + val
|
|
|
|
|
|
|
|
return val
|
|
|
|
|
|
|
|
data = ''
|
2021-07-07 00:29:41 +02:00
|
|
|
for _ in range(binary_length):
|
2021-05-31 20:08:31 +02:00
|
|
|
next_byte = self.stream.read_int()
|
|
|
|
if next_byte is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node name!")
|
|
|
|
data = data + int_to_bin(next_byte)
|
2019-12-08 22:43:49 +01:00
|
|
|
data_str = [data[i:(i + 6)] for i in range(0, len(data), 6)]
|
|
|
|
data_int = [int(val, 2) for val in data_str]
|
|
|
|
ret = ''.join([Node.NODE_NAME_CHARS[val] for val in data_int])
|
|
|
|
ret = ret[:length]
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def __read_node(self, node_type: int) -> Node:
|
|
|
|
"""
|
|
|
|
Given an integer node type, read the node's name, possible attributes
|
|
|
|
and children. Will return a Node representing this node. Note
|
|
|
|
that calling this on the first node should return a tree of all nodes.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Node object
|
|
|
|
"""
|
|
|
|
name = self.__read_node_name()
|
|
|
|
node = Node(name=name, type=node_type)
|
|
|
|
|
|
|
|
while True:
|
|
|
|
child_type = self.stream.read_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if child_type is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node type!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
if child_type == Node.END_OF_NODE:
|
|
|
|
return node
|
|
|
|
elif child_type == Node.ATTR_TYPE:
|
|
|
|
key = self.__read_node_name()
|
2021-04-08 03:05:20 +02:00
|
|
|
node.set_attribute(key)
|
2019-12-08 22:43:49 +01:00
|
|
|
else:
|
|
|
|
child = self.__read_node(child_type)
|
|
|
|
node.add_child(child)
|
|
|
|
|
|
|
|
def get_tree(self) -> Node:
|
|
|
|
"""
|
|
|
|
Parse the header and body such that we can return a Node tree
|
|
|
|
representing the data passed to us.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Node object
|
|
|
|
"""
|
|
|
|
if self.executed:
|
2021-05-31 20:08:31 +02:00
|
|
|
raise BinaryEncodingException("Logic error, should only call this once per instance")
|
2019-12-08 22:43:49 +01:00
|
|
|
self.executed = True
|
|
|
|
|
|
|
|
# Read the header first
|
|
|
|
header_length = self.stream.read_int(4)
|
2021-05-31 20:08:31 +02:00
|
|
|
if header_length is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read header length!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
2021-05-31 20:08:31 +02:00
|
|
|
node_type = self.stream.read_int()
|
|
|
|
if node_type is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read root node type!")
|
|
|
|
root = self.__read_node(node_type)
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
eod = self.stream.read_int()
|
|
|
|
if eod != Node.END_OF_DOCUMENT:
|
2020-01-07 22:29:07 +01:00
|
|
|
raise BinaryEncodingException(f'Unknown node type {eod} at end of document')
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
# Skip by any padding
|
|
|
|
while self.stream.pos < header_length + 4:
|
|
|
|
self.stream.read_byte()
|
|
|
|
|
|
|
|
# Read the body next
|
|
|
|
body_length = self.stream.read_int(4)
|
|
|
|
|
|
|
|
if body_length is not None and body_length > 0:
|
|
|
|
# We have a body
|
|
|
|
body = self.stream.read_blob(body_length)
|
|
|
|
if body is None:
|
|
|
|
raise BinaryEncodingException('Body has insufficient data')
|
|
|
|
|
|
|
|
ordering = PackedOrdering(body_length)
|
|
|
|
|
|
|
|
values = PackedOrdering.node_to_body_ordering(root)
|
|
|
|
|
|
|
|
for value in values:
|
|
|
|
node = value['node']
|
|
|
|
|
|
|
|
if value['type'] == 'attribute':
|
|
|
|
size = None
|
|
|
|
enc = 's'
|
|
|
|
dtype = 'str'
|
|
|
|
array = False
|
|
|
|
composite = False
|
|
|
|
else:
|
|
|
|
size = node.data_length
|
|
|
|
enc = node.data_encoding
|
|
|
|
dtype = node.data_type
|
|
|
|
array = node.is_array
|
|
|
|
composite = node.is_composite
|
|
|
|
|
|
|
|
if composite and array:
|
|
|
|
raise Exception('Logic error, no support for composite arrays!')
|
|
|
|
|
|
|
|
if not array:
|
|
|
|
# Scalar value
|
|
|
|
alignment = value['alignment']
|
|
|
|
|
|
|
|
if alignment == 1:
|
|
|
|
loc = ordering.get_next_byte()
|
|
|
|
elif alignment == 2:
|
|
|
|
loc = ordering.get_next_short()
|
|
|
|
elif alignment == 4:
|
|
|
|
loc = ordering.get_next_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if loc is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read node data location!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
if size is None:
|
|
|
|
# The size should be read from the first 4 bytes
|
|
|
|
size = struct.unpack('>I', body[loc:(loc + 4)])[0]
|
|
|
|
ordering.mark_used(size + 4, loc, round_to=4)
|
|
|
|
loc = loc + 4
|
|
|
|
|
|
|
|
decode_data = body[loc:(loc + size)]
|
2020-01-07 22:29:07 +01:00
|
|
|
decode_value = f'>{size}{enc}'
|
2019-12-08 22:43:49 +01:00
|
|
|
else:
|
|
|
|
# The size is built-in
|
|
|
|
ordering.mark_used(size, loc)
|
|
|
|
|
|
|
|
decode_data = body[loc:(loc + size)]
|
2020-01-07 22:29:07 +01:00
|
|
|
decode_value = f'>{enc}'
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
if composite:
|
|
|
|
val_list = list(struct.unpack(decode_value, decode_data))
|
|
|
|
if value['type'] == 'attribute':
|
|
|
|
raise Exception('Logic error, shouldn\'t have composite attribute type!')
|
|
|
|
node.set_value(val_list)
|
|
|
|
continue
|
|
|
|
|
|
|
|
val = struct.unpack(decode_value, decode_data)[0]
|
|
|
|
|
|
|
|
if dtype == 'str':
|
|
|
|
# Need to convert this from encoding to standard string.
|
|
|
|
# Also, need to lob off the trailing null.
|
|
|
|
try:
|
2021-10-24 19:12:06 +02:00
|
|
|
val = val[:-1].decode(self.encoding, 'replace')
|
2019-12-08 22:43:49 +01:00
|
|
|
except UnicodeDecodeError:
|
|
|
|
# Nothing we can do here
|
|
|
|
pass
|
|
|
|
|
|
|
|
if value['type'] == 'attribute':
|
|
|
|
node.set_attribute(value['name'], val)
|
|
|
|
else:
|
|
|
|
node.set_value(val)
|
|
|
|
else:
|
|
|
|
# Array value
|
|
|
|
loc = ordering.get_next_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if loc is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to read array length location!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
# The raw size in bytes
|
|
|
|
length = struct.unpack('>I', body[loc:(loc + 4)])[0]
|
|
|
|
elems = int(length / size)
|
|
|
|
|
|
|
|
ordering.mark_used(length + 4, loc, round_to=4)
|
|
|
|
loc = loc + 4
|
|
|
|
decode_data = body[loc:(loc + length)]
|
2020-01-07 22:29:07 +01:00
|
|
|
decode_value = f'>{enc * elems}'
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
val = struct.unpack(decode_value, decode_data)
|
|
|
|
node.set_value([v for v in val])
|
|
|
|
|
|
|
|
return root
|
|
|
|
|
|
|
|
|
|
|
|
class BinaryEncoder:
|
|
|
|
"""
|
|
|
|
A class capable of taking a Node tree and encoding it into a binary format.
|
|
|
|
"""
|
|
|
|
|
2022-09-09 14:44:29 +02:00
|
|
|
def __init__(self, tree: Node, encoding: str, compressed: bool=True) -> None:
|
2019-12-08 22:43:49 +01:00
|
|
|
"""
|
|
|
|
Initialize the object.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
tree - A binary blob of data to be decoded
|
|
|
|
encoding - A string representing the text encoding for string elements. Should be either
|
|
|
|
'shift-jis', 'euc-jp' or 'utf-8'
|
|
|
|
"""
|
|
|
|
self.stream = OutputStream()
|
|
|
|
self.encoding = encoding
|
|
|
|
self.tree = tree
|
|
|
|
self.__body: List[int] = []
|
|
|
|
self.__body_len = 0
|
|
|
|
self.executed = False
|
2022-09-09 14:44:29 +02:00
|
|
|
self.compressed = compressed
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
# Generate the characer LUT
|
|
|
|
self.char_lut: Dict[str, int] = {}
|
|
|
|
for i in range(len(Node.NODE_NAME_CHARS)):
|
|
|
|
self.char_lut[Node.NODE_NAME_CHARS[i]] = i
|
|
|
|
|
|
|
|
def __write_node_name(self, name: str) -> None:
|
|
|
|
"""
|
|
|
|
Given the current position in the stream, write the 6-bit-byte packed string name of the
|
|
|
|
node.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
name - A string name which should be encoded as a node name
|
|
|
|
"""
|
2022-09-09 14:44:29 +02:00
|
|
|
if not self.compressed:
|
|
|
|
encoded = name.encode(self.encoding)
|
|
|
|
length = len(encoded)
|
|
|
|
|
|
|
|
if length > BinaryEncoding.NAME_MAX_DECOMPRESSED:
|
|
|
|
raise BinaryEncodingException("Node name length over decompressed limit")
|
|
|
|
|
|
|
|
if length < 64:
|
|
|
|
self.stream.write_int(length + 0x3f)
|
|
|
|
else:
|
|
|
|
length += 0x7fbf
|
|
|
|
self.stream.write_int((length >> 8) & 0xff)
|
|
|
|
self.stream.write_int(length & 0xff)
|
|
|
|
self.stream.write_blob(encoded)
|
|
|
|
return
|
|
|
|
|
2019-12-08 22:43:49 +01:00
|
|
|
def char_to_bin(ch: str) -> str:
|
|
|
|
index = self.char_lut[ch]
|
|
|
|
val = bin(index)[2:]
|
|
|
|
|
|
|
|
while len(val) < 6:
|
|
|
|
val = '0' + val
|
|
|
|
|
|
|
|
return val[-6:]
|
|
|
|
|
|
|
|
# Convert to six bit bytes
|
|
|
|
length = len(name)
|
|
|
|
data = ''.join([char_to_bin(c) for c in name])
|
|
|
|
|
|
|
|
# Pad out the rest with zeros
|
|
|
|
while (len(data) & 0x7) != 0:
|
|
|
|
data = data + '0'
|
|
|
|
|
|
|
|
# Convert to 8-bit bytes
|
|
|
|
data_chunks = [data[i:(i + 8)] for i in range(0, len(data), 8)]
|
|
|
|
data_int = [int(val, 2) for val in data_chunks]
|
|
|
|
|
|
|
|
# Output
|
|
|
|
self.stream.write_int(length)
|
|
|
|
for val in data_int:
|
|
|
|
self.stream.write_int(val)
|
|
|
|
|
|
|
|
def __write_node(self, node: Node) -> None:
|
|
|
|
"""
|
|
|
|
Given an integer node type, read the node's name, possible attributes
|
|
|
|
and children. Will return a Node representing this node. Note
|
|
|
|
that calling this on the first node should return a tree of all nodes.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
node - A Node which should be encoded.
|
|
|
|
"""
|
|
|
|
to_write = PackedOrdering.node_to_body_ordering(node, include_children=False, include_void=True)
|
|
|
|
for thing in to_write:
|
|
|
|
# First, write the type of this node out
|
|
|
|
if thing['type'] == 'value':
|
|
|
|
self.stream.write_int(thing['node'].type)
|
|
|
|
else:
|
|
|
|
self.stream.write_int(Node.ATTR_TYPE)
|
|
|
|
# Now, write the name out
|
|
|
|
self.__write_node_name(thing['name'])
|
|
|
|
|
|
|
|
# Now, write out the children
|
|
|
|
for child in node.children:
|
|
|
|
self.__write_node(child)
|
|
|
|
|
|
|
|
# Now, write out the end of node marker
|
|
|
|
self.stream.write_int(Node.END_OF_NODE)
|
|
|
|
|
|
|
|
def __add_data(self, data: bytes, length: int, offset: int) -> None:
|
|
|
|
"""
|
|
|
|
Given some binary data, a length and an offset, add the data to the offset in the
|
|
|
|
output body. This function will ensure that any new bytes that aren't copied are
|
|
|
|
zero'd out. This includes bytes before the offset as well as any pad bytes after
|
|
|
|
the offset + length in order to pad this body to a 4 byte boundary.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
data - A blob of binary data which should be copied into the output
|
|
|
|
length - Number of characters of data to copy
|
|
|
|
offset - Offset into the body to start copying
|
|
|
|
"""
|
|
|
|
while self.__body_len < (length + offset):
|
|
|
|
self.__body.append(0)
|
|
|
|
self.__body_len = self.__body_len + 1
|
|
|
|
|
|
|
|
# Make sure its padded to 4 bytes
|
|
|
|
while (self.__body_len & 0x3) != 0:
|
|
|
|
self.__body.append(0)
|
|
|
|
self.__body_len = self.__body_len + 1
|
|
|
|
|
|
|
|
for i in range(length):
|
|
|
|
self.__body[offset + i] = data[i]
|
|
|
|
|
|
|
|
def get_data(self) -> bytes:
|
|
|
|
"""
|
|
|
|
Encode the header and body into binary formrt.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Binary blob of data that can be decoded by a game.
|
|
|
|
"""
|
|
|
|
if self.executed:
|
|
|
|
raise Exception("Logic error, should only call this once per instance")
|
|
|
|
self.executed = True
|
|
|
|
|
|
|
|
# Generate the header first
|
|
|
|
self.__write_node(self.tree)
|
|
|
|
self.stream.write_int(Node.END_OF_DOCUMENT)
|
|
|
|
self.stream.write_pad(4)
|
|
|
|
|
|
|
|
header_length = len(self.stream.data)
|
|
|
|
header = self.stream.data[:]
|
|
|
|
|
|
|
|
# Generate the body
|
|
|
|
values = PackedOrdering.node_to_body_ordering(self.tree)
|
|
|
|
if len(values) > 0:
|
|
|
|
ordering = PackedOrdering(0, allow_expansion=True)
|
|
|
|
|
|
|
|
for value in values:
|
|
|
|
node = value['node']
|
|
|
|
|
|
|
|
if value['type'] == 'attribute':
|
|
|
|
size = None
|
|
|
|
enc = 's'
|
|
|
|
dtype = 'str'
|
|
|
|
array = False
|
|
|
|
composite = False
|
|
|
|
val = node.attribute(value['name'])
|
|
|
|
else:
|
|
|
|
size = node.data_length
|
|
|
|
enc = node.data_encoding
|
|
|
|
dtype = node.data_type
|
|
|
|
array = node.is_array
|
|
|
|
composite = node.is_composite
|
|
|
|
val = node.value
|
|
|
|
|
|
|
|
if val is None:
|
|
|
|
raise BinaryEncodingException(
|
2020-01-07 22:29:07 +01:00
|
|
|
f'Node \'{value["name"]}\' has invalid value None',
|
2019-12-08 22:43:49 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
if not array:
|
|
|
|
# Scalar value
|
|
|
|
alignment = value['alignment']
|
|
|
|
|
|
|
|
if alignment == 1:
|
|
|
|
loc = ordering.get_next_byte()
|
|
|
|
elif alignment == 2:
|
|
|
|
loc = ordering.get_next_short()
|
|
|
|
elif alignment == 4:
|
|
|
|
loc = ordering.get_next_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if loc is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting to allocate node location!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
if dtype == 'str':
|
|
|
|
# Need to convert this to encoding from standard string.
|
|
|
|
# Also, need to lob off the trailing null.
|
|
|
|
if not isinstance(val, str):
|
|
|
|
raise BinaryEncodingException(
|
2020-01-07 22:29:07 +01:00
|
|
|
f'Node \'{value["name"]}\' has non-string value!',
|
2019-12-08 22:43:49 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
try:
|
|
|
|
valbytes = val.encode(self.encoding) + b'\0'
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
raise BinaryEncodingException(
|
2020-01-07 22:29:07 +01:00
|
|
|
f'Node \'{value["name"]}\' has un-encodable string value \'{val}\''
|
2019-12-08 22:43:49 +01:00
|
|
|
)
|
|
|
|
size = len(valbytes)
|
|
|
|
self.__add_data(struct.pack('>I', size) + valbytes, size + 4, loc)
|
|
|
|
ordering.mark_used(size + 4, loc, round_to=4)
|
|
|
|
|
|
|
|
# We took care of this one
|
|
|
|
continue
|
|
|
|
elif dtype == 'bin':
|
|
|
|
# Store raw binary
|
|
|
|
size = len(val)
|
|
|
|
self.__add_data(struct.pack('>I', size) + val, size + 4, loc)
|
|
|
|
ordering.mark_used(size + 4, loc, round_to=4)
|
|
|
|
|
|
|
|
# We took care of this one
|
|
|
|
continue
|
|
|
|
elif composite:
|
|
|
|
# Array, but not, somewhat silly
|
2021-05-31 20:08:31 +02:00
|
|
|
if size is None:
|
|
|
|
raise Exception("Logic error, node size not set yet this is not an attribute!")
|
|
|
|
|
2020-01-07 22:29:07 +01:00
|
|
|
encode_value = f'>{enc}'
|
2019-12-08 22:43:49 +01:00
|
|
|
self.__add_data(struct.pack(encode_value, *val), size, loc)
|
|
|
|
ordering.mark_used(size, loc)
|
|
|
|
|
|
|
|
# We took care of this one
|
|
|
|
continue
|
|
|
|
elif dtype == 'bool':
|
|
|
|
val = 1 if val else 0
|
|
|
|
|
|
|
|
# The size is built-in, emit it
|
2021-05-31 20:08:31 +02:00
|
|
|
if size is None:
|
|
|
|
raise Exception("Logic error, node size not set yet this is not an attribute!")
|
|
|
|
|
2020-01-07 22:29:07 +01:00
|
|
|
encode_value = f'>{enc}'
|
2019-12-08 22:43:49 +01:00
|
|
|
self.__add_data(struct.pack(encode_value, val), size, loc)
|
|
|
|
ordering.mark_used(size, loc)
|
|
|
|
else:
|
|
|
|
# Array value
|
|
|
|
loc = ordering.get_next_int()
|
2021-05-31 20:08:31 +02:00
|
|
|
if loc is None:
|
|
|
|
raise BinaryEncodingException("Ran out of data when attempting allocate array location!")
|
|
|
|
if size is None:
|
|
|
|
raise Exception("Logic error, node size not set yet this is not an attribute!")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
# The raw size in bytes
|
|
|
|
elems = len(val)
|
|
|
|
length = elems * size
|
|
|
|
|
|
|
|
# Write out the header (number of bytes taken up)
|
|
|
|
data = struct.pack('>I', length)
|
2020-01-07 22:29:07 +01:00
|
|
|
encode_value = f'>{enc}'
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
# Write out data one element at a time
|
|
|
|
for v in val:
|
|
|
|
if dtype == 'bool':
|
|
|
|
data = data + struct.pack(encode_value, 1 if v else 0)
|
|
|
|
else:
|
|
|
|
data = data + struct.pack(encode_value, v)
|
|
|
|
|
|
|
|
self.__add_data(data, length + 4, loc)
|
|
|
|
ordering.mark_used(length + 4, loc, round_to=4)
|
|
|
|
|
|
|
|
return b''.join([
|
|
|
|
struct.pack('>I', header_length),
|
|
|
|
header,
|
|
|
|
struct.pack('>I', self.__body_len),
|
|
|
|
bytes(self.__body),
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
class BinaryEncoding:
|
|
|
|
"""
|
|
|
|
Wrapper class representing a Binary Encoding.
|
|
|
|
"""
|
2021-05-31 20:09:40 +02:00
|
|
|
MAGIC: Final[int] = 0xA0
|
2021-04-05 02:50:30 +02:00
|
|
|
|
2021-05-31 20:09:40 +02:00
|
|
|
COMPRESSED_WITH_DATA: Final[int] = 0x42
|
|
|
|
COMPRESSED_WITHOUT_DATA: Final[int] = 0x43
|
|
|
|
DECOMPRESSED_WITH_DATA: Final[int] = 0x45
|
|
|
|
DECOMPRESSED_WITHOUT_DATA: Final[int] = 0x46
|
2021-04-05 02:50:30 +02:00
|
|
|
|
2022-06-18 02:59:31 +02:00
|
|
|
NAME_MAX_COMPRESSED: Final[int] = 0x24
|
|
|
|
NAME_MAX_DECOMPRESSED: Final[int] = 0x1000
|
|
|
|
|
2019-12-08 22:43:49 +01:00
|
|
|
# The string values should match the constants in EAmuseProtocol.
|
|
|
|
# I have no better way to link these than to write this comment,
|
|
|
|
# as otherwise we would have a circular dependency.
|
2021-05-31 20:09:40 +02:00
|
|
|
ENCODINGS: Final[Dict[int, str]] = {
|
2021-04-05 02:50:30 +02:00
|
|
|
0x00: "ascii",
|
|
|
|
0x20: "shift-jis-legacy",
|
|
|
|
0x60: "euc-jp",
|
|
|
|
0x80: "shift-jis",
|
|
|
|
0xA0: "utf-8",
|
2019-12-08 22:43:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
"""
|
|
|
|
Initialize the encoding object.
|
|
|
|
"""
|
|
|
|
self.encoding: Optional[str] = None
|
2022-06-18 02:59:31 +02:00
|
|
|
self.compressed: bool = True
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
def __sanitize_encoding(self, enc: str) -> str:
|
|
|
|
"""
|
|
|
|
Convert an internal encoding value from an externally acceptible value.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
enc - The encoding as a string as passed from an outside caller
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
An encoding string suitable for internal use.
|
|
|
|
"""
|
|
|
|
if enc == "shift-jis-legacy":
|
|
|
|
return "shift-jis"
|
|
|
|
return enc
|
|
|
|
|
|
|
|
def decode(self, data: bytes, skip_on_exceptions: bool=False) -> Optional[Node]:
|
|
|
|
"""
|
|
|
|
Given a data blob, decode the data with the current encoding. Will
|
|
|
|
also set the class property value 'encoding' to the encoding used
|
|
|
|
on the last decode.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
data - Binary blob representing the data to decode
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Node object representing the root of the decoded tree, or None
|
|
|
|
if we couldn't decode the object for some reason.
|
|
|
|
"""
|
|
|
|
try:
|
2021-04-05 02:50:30 +02:00
|
|
|
data_magic, contents, encoding_raw, encoding_swapped = struct.unpack(">BBBB", data[0:4])
|
2019-12-08 22:43:49 +01:00
|
|
|
except struct.error:
|
|
|
|
# Couldn't even parse magic
|
|
|
|
return None
|
2021-04-05 02:50:30 +02:00
|
|
|
|
|
|
|
if data_magic != BinaryEncoding.MAGIC:
|
|
|
|
return None
|
|
|
|
if ((~encoding_raw) & 0xFF) != encoding_swapped:
|
|
|
|
return None
|
2022-06-18 02:59:31 +02:00
|
|
|
|
|
|
|
self.compressed = contents in [
|
|
|
|
BinaryEncoding.COMPRESSED_WITH_DATA, BinaryEncoding.COMPRESSED_WITHOUT_DATA
|
|
|
|
]
|
|
|
|
if not self.compressed and contents not in [
|
|
|
|
BinaryEncoding.DECOMPRESSED_WITH_DATA, BinaryEncoding.DECOMPRESSED_WITHOUT_DATA
|
|
|
|
]:
|
2021-04-05 02:50:30 +02:00
|
|
|
return None
|
|
|
|
|
|
|
|
encoding = BinaryEncoding.ENCODINGS.get(encoding_raw)
|
2019-12-08 22:43:49 +01:00
|
|
|
|
|
|
|
if encoding is not None:
|
|
|
|
self.encoding = encoding
|
|
|
|
try:
|
2022-06-18 02:59:31 +02:00
|
|
|
decoder = BinaryDecoder(
|
|
|
|
data[4:], self.__sanitize_encoding(encoding), self.compressed
|
|
|
|
)
|
2019-12-08 22:43:49 +01:00
|
|
|
return decoder.get_tree()
|
|
|
|
except BinaryEncodingException:
|
|
|
|
if skip_on_exceptions:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2022-09-09 14:44:29 +02:00
|
|
|
def encode(self, tree: Node, encoding: Optional[str]=None, compressed: bool=True) -> bytes:
|
2019-12-08 22:43:49 +01:00
|
|
|
"""
|
|
|
|
Given a tree of Node objects, encode the data with the current encoding.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
tree - Node tree representing the data to encode
|
|
|
|
encoding - The text encoding to use. If None, will try to use the encoding from
|
|
|
|
the last successful decode
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Binary blob representing encoded data
|
|
|
|
"""
|
|
|
|
if encoding is None:
|
|
|
|
encoding = self.encoding
|
|
|
|
if encoding is None:
|
|
|
|
raise BinaryEncodingException('Unknown encoding')
|
|
|
|
|
|
|
|
encoding_magic = None
|
2021-04-05 02:50:30 +02:00
|
|
|
for magic, encstr in BinaryEncoding.ENCODINGS.items():
|
|
|
|
if encstr == encoding:
|
2019-12-08 22:43:49 +01:00
|
|
|
encoding_magic = magic
|
|
|
|
break
|
|
|
|
|
|
|
|
if encoding_magic is None:
|
2020-01-07 22:29:07 +01:00
|
|
|
raise BinaryEncodingException(f"Invalid text encoding {encoding}")
|
2019-12-08 22:43:49 +01:00
|
|
|
|
2022-09-09 14:44:29 +02:00
|
|
|
encoder = BinaryEncoder(tree, self.__sanitize_encoding(encoding), compressed)
|
2019-12-08 22:43:49 +01:00
|
|
|
data = encoder.get_data()
|
2022-09-09 14:44:29 +02:00
|
|
|
return struct.pack(
|
|
|
|
">BBBB",
|
|
|
|
BinaryEncoding.MAGIC,
|
|
|
|
BinaryEncoding.COMPRESSED_WITH_DATA if compressed else BinaryEncoding.DECOMPRESSED_WITH_DATA,
|
|
|
|
encoding_magic,
|
|
|
|
(~encoding_magic & 0xFF)
|
|
|
|
) + data
|