573in1/tools/common/util.py

500 lines
13 KiB
Python

# -*- coding: utf-8 -*-
# 573in1 - Copyright (C) 2022-2024 spicyjpeg
#
# 573in1 is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# 573in1 is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# 573in1. If not, see <https://www.gnu.org/licenses/>.
import json, logging, re
from collections.abc import \
ByteString, Generator, Iterable, Iterator, Mapping, Sequence
from dataclasses import dataclass, field
from functools import reduce
from hashlib import md5
from itertools import chain
from io import SEEK_END, SEEK_SET
from typing import Any, BinaryIO, TextIO
## Value manipulation
def roundUpToMultiple(value: int, length: int) -> int:
diff: int = value % length
return (value - diff + length) if diff else value
def byteSwap(value: int, byteLength: int) -> int:
return int.from_bytes(value.to_bytes(byteLength, "big"), "little")
def encodeSigned(value: int, bitLength: int) -> int:
valueMask: int = (1 << bitLength) - 1
return value & valueMask
def decodeSigned(value: int, bitLength: int) -> int:
signMask: int = 1 << (bitLength - 1)
valueMask: int = signMask - 1
return (value & valueMask) - (value & signMask)
## String manipulation
# This encoding is similar to standard base45, but with some problematic
# characters (' ', '$', '%', '*') excluded.
_BASE41_CHARSET: str = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ+-./:"
_COLOR_REGEX: re.Pattern = re.compile(r"^#?([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})$")
def toPrintableChar(value: int) -> str:
if (value < 0x20) or (value > 0x7e):
return "."
return chr(value)
def hexdumpToFile(data: Sequence[int], output: TextIO, width: int = 16):
for i in range(0, len(data), width):
hexBytes: map[str] = map(lambda value: f"{value:02x}", data[i:i + width])
hexLine: str = " ".join(hexBytes).ljust(width * 3 - 1)
asciiBytes: map[str] = map(toPrintableChar, data[i:i + width])
asciiLine: str = "".join(asciiBytes).ljust(width)
output.write(f" {i:04x}: {hexLine} |{asciiLine}|\n")
def serialNumberToString(_id: ByteString) -> str:
value: int = int.from_bytes(_id[1:7], "little")
#if value >= 100000000:
#return "xxxx-xxxx"
return f"{(value // 10000) % 10000:04d}-{value % 10000:04d}"
def decodeBase41(data: str) -> bytearray:
mapped: map[int] = map(_BASE41_CHARSET.index, data)
output: bytearray = bytearray()
for a, b, c in zip(mapped, mapped, mapped):
value: int = a + (b * 41) + (c * 1681)
output.append(value >> 8)
output.append(value & 0xff)
return output
def colorFromString(value: str) -> tuple[int, int, int]:
matched: re.Match | None = _COLOR_REGEX.match(value)
if matched is None:
raise ValueError(f"invalid color value '{value}'")
digits: str = matched.group(1)
if len(digits) == 3:
return (
int(digits[0], 16) * 0x11,
int(digits[1], 16) * 0x11,
int(digits[2], 16) * 0x11
)
else:
return (
int(digits[0:2], 16),
int(digits[2:4], 16),
int(digits[4:6], 16)
)
## Hashes and checksums
def hashData(data: Iterable[int]) -> int:
value: int = 0
for byte in data:
value = (
byte +
((value << 6) & 0xffffffff) +
((value << 16) & 0xffffffff) -
value
) & 0xffffffff
return value
def checksum8(data: Iterable[int], invert: bool = False) -> int:
return (sum(data) & 0xff) ^ (0xff if invert else 0)
def checksum8to16(data: Iterable[int], invert: bool = False) -> int:
return (sum(data) & 0xffff) ^ (0xffff if invert else 0)
def checksum16(
data: Iterable[int], endianness: str = "little", invert: bool = False
) -> int:
it: Iterator = iter(data)
values: map[int] = map(lambda x: int.from_bytes(x, endianness), zip(it, it))
return (sum(values) & 0xffff) ^ (0xffff if invert else 0)
def shortenedMD5(data: ByteString) -> bytearray:
hashed: bytes = md5(data).digest()
output: bytearray = bytearray(8)
for i in range(8):
output[i] = hashed[i] ^ hashed[i + 8]
return output
## CRC calculation
_CRC8_POLY: int = 0x8c
def dsCRC8(data: ByteString) -> int:
crc: int = 0
for byte in data:
for _ in range(8):
temp: int = crc ^ byte
byte >>= 1
crc >>= 1
if temp & 1:
crc ^= _CRC8_POLY
return crc & 0xff
def sidCRC16(data: ByteString, width: int = 16) -> int:
crc: int = 0
for i, byte in enumerate(data):
for j in range(i * 8, (i + 1) * 8):
if byte & 1:
crc ^= 1 << (j % width)
byte >>= 1
return crc & 0xffff
## Logging
def setupLogger(level: int | None):
logging.basicConfig(
format = "[{levelname:8s}] {message}",
style = "{",
level = (
logging.WARNING,
logging.INFO,
logging.DEBUG
)[min(level or 0, 2)]
)
## JSON pretty printing
@dataclass
class JSONGroupedArray:
groups: list[Sequence] = field(default_factory = list)
def merge(self) -> list:
return list(chain(*self.groups))
@dataclass
class JSONGroupedObject:
groups: list[Mapping] = field(default_factory = list)
def merge(self) -> Mapping:
return reduce(lambda a, b: a | b, self.groups)
class JSONFormatter:
def __init__(
self,
minify: bool = False,
groupedOnSingleLine: bool = False,
ungroupedOnSingleLine: bool = True,
indentString: str = "\t"
):
self.minify: bool = minify
self.groupedOnSingleLine: bool = groupedOnSingleLine
self.ungroupedOnSingleLine: bool = ungroupedOnSingleLine
self.indentString: str = indentString
self._indentLevel: int = 0
self._forceSingleLine: int = 0
def _inlineSep(self, char: str) -> str:
if self.minify:
return char
elif char in ")]}":
return f" {char}"
else:
return f"{char} "
def _lineBreak(self, numBreaks: int = 1) -> str:
if self.minify:
return ""
else:
return ("\n" * numBreaks) + (self.indentString * self._indentLevel)
def _singleLineArray(self, obj: Sequence) -> Generator[str, None, None]:
if not obj:
yield "[]"
return
self._forceSingleLine += 1
yield self._inlineSep("[")
lastIndex: int = len(obj) - 1
for index, item in enumerate(obj):
yield from self.serialize(item)
if index < lastIndex:
yield self._inlineSep(",")
self._forceSingleLine -= 1
yield self._inlineSep("]")
def _singleLineObject(self, obj: Mapping) -> Generator[str, None, None]:
if not obj:
yield "{}"
return
self._forceSingleLine += 1
yield self._inlineSep("{")
lastIndex: int = len(obj) - 1
for index, ( key, value ) in enumerate(obj.items()):
yield from self.serialize(key)
yield self._inlineSep(":")
yield from self.serialize(value)
if index < lastIndex:
yield self._inlineSep(",")
self._forceSingleLine -= 1
yield self._inlineSep("}")
def _groupedArray(
self, groups: Sequence[Sequence]
) -> Generator[str, None, None]:
if not groups:
yield "[]"
return
self._indentLevel += 1
yield "[" + self._lineBreak()
lastGroupIndex: int = len(groups) - 1
for groupIndex, obj in enumerate(groups):
if not obj:
raise ValueError("empty groups are not allowed")
lastIndex: int = len(obj) - 1
for index, item in enumerate(obj):
yield from self.serialize(item)
if index < lastIndex:
yield "," + self._lineBreak()
if groupIndex < lastGroupIndex:
yield "," + self._lineBreak(2)
self._indentLevel -= 1
yield self._lineBreak() + "]"
def _groupedObject(
self, groups: Sequence[Mapping]
) -> Generator[str, None, None]:
if not groups:
yield "{}"
return
self._indentLevel += 1
yield "{" + self._lineBreak()
lastGroupIndex: int = len(groups) - 1
for groupIndex, obj in enumerate(groups):
if not obj:
raise ValueError("empty groups are not allowed")
keys: list[str] = [
("".join(self.serialize(key)) + self._inlineSep(":"))
for key in obj.keys()
]
lastIndex: int = len(obj) - 1
maxKeyLength: int = 0 if self.minify else max(map(len, keys))
for index, value in enumerate(obj.values()):
yield keys[index].ljust(maxKeyLength)
yield from self.serialize(value)
if index < lastIndex:
yield "," + self._lineBreak()
if groupIndex < lastGroupIndex:
yield "," + self._lineBreak(2)
self._indentLevel -= 1
yield self._lineBreak() + "}"
def serialize(self, obj: Any) -> Generator[str, None, None]:
groupedOnSingleLine: bool = \
self.groupedOnSingleLine or bool(self._forceSingleLine)
ungroupedOnSingleLine: bool = \
self.ungroupedOnSingleLine or bool(self._forceSingleLine)
match obj:
case JSONGroupedArray() if groupedOnSingleLine:
yield from self._singleLineArray(obj.merge())
case JSONGroupedArray() if not groupedOnSingleLine:
yield from self._groupedArray(obj.groups)
case JSONGroupedObject() if groupedOnSingleLine:
yield from self._singleLineObject(obj.merge())
case JSONGroupedObject() if not groupedOnSingleLine:
yield from self._groupedObject(obj.groups)
case (list() | tuple()) if ungroupedOnSingleLine:
yield from self._singleLineArray(obj)
case (list() | tuple()) if not ungroupedOnSingleLine:
yield from self._groupedArray(( obj, ))
case Mapping() if ungroupedOnSingleLine:
yield from self._singleLineObject(obj)
case Mapping() if not ungroupedOnSingleLine:
yield from self._groupedObject(( obj, ))
case _:
yield json.dumps(obj, ensure_ascii = False)
## Hash table generator
@dataclass
class HashTableEntry:
fullHash: int
chainIndex: int
data: Any
class HashTableBuilder:
def __init__(self, numBuckets: int = 256):
self._numBuckets: int = numBuckets
self.entries: list[HashTableEntry | None] = [ None ] * numBuckets
def addEntry(self, fullHash: int, data: Any) -> int:
index: int = fullHash % self._numBuckets
entry: HashTableEntry = HashTableEntry(fullHash, 0, data)
bucket: HashTableEntry | None = self.entries[index]
# If no bucket exists for the entry's index, create one.
if bucket is None:
self.entries[index] = entry
return index
if bucket.fullHash == fullHash:
raise KeyError(f"hash collision detected ({fullHash:#010x})")
# Otherwise, follow the buckets's chain, find the last chained item and
# link the new entry to it.
while bucket.chainIndex:
bucket = self.entries[bucket.chainIndex]
if bucket.fullHash == fullHash:
raise KeyError(f"hash collision detected, ({fullHash:#010x})")
bucket.chainIndex = len(self.entries)
self.entries.append(entry)
return bucket.chainIndex
class StringBlobBuilder:
def __init__(self, alignment: int = 1):
self._alignment: int = alignment
self._offsets: dict[ByteString, int] = {}
self.data: bytearray = bytearray()
def addString(self, string: ByteString) -> int:
# If the same string is already in the blob, return its offset without
# adding new data.
offset: int | None = self._offsets.get(string, None)
if offset is None:
offset = len(self.data)
self._offsets[string] = offset
self.data += string
while len(self.data) % self._alignment:
self.data.append(0)
return offset
## Odd/even interleaved file reader
class InterleavedFile(BinaryIO):
def __init__(self, even: BinaryIO, odd: BinaryIO):
self._even: BinaryIO = even
self._odd: BinaryIO = odd
self._offset: int = 0
# Determine the total size of the file ahead of time.
even.seek(0, SEEK_END)
odd.seek(0, SEEK_END)
self._length: int = even.tell()
if self._length != odd.tell():
raise RuntimeError("even and odd files must have the same size")
even.seek(0, SEEK_SET)
odd.seek(0, SEEK_SET)
def __enter__(self) -> BinaryIO:
return self
def __exit__(self, excType: Any, excValue: Any, traceback: Any) -> bool:
self.close()
return False
def close(self):
self._even.close()
self._odd.close()
def seek(self, offset: int, mode: int = SEEK_SET):
match mode:
case 0:
self._offset = offset
case 1:
self._offset = min(self._offset + offset, self._length)
case 2:
self._offset = max(self._length - offset, 0)
self._even.seek((self._offset + 1) // 2)
self._odd.seek(self._offset // 2)
def tell(self) -> int:
return self._offset
def read(self, length: int) -> bytearray:
_length: int = min(length, self._length - self._offset)
output: bytearray = bytearray(_length)
if self._offset % 2:
output[0:_length:2] = self._odd.read((_length + 1) // 2)
output[1:_length:2] = self._even.read(_length // 2)
else:
output[0:_length:2] = self._even.read((_length + 1) // 2)
output[1:_length:2] = self._odd.read(_length // 2)
self._offset += _length
return output