1
0
mirror of synced 2025-02-17 11:08:36 +01:00

Overhaul header metadata code with new FumenHeader Python class (plus use ints, not bytes) (#45)

Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".

However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.

In summary, the changes in this PR are:

- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..

Necessary for #40.
Fixes #44.
This commit is contained in:
Viv 2023-07-19 14:36:58 -04:00 committed by GitHub
parent ecfed22b38
commit 2a6d1c3df5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 202 additions and 440 deletions

View File

@ -47,51 +47,6 @@ typeNotes = {v: k for k, v in noteTypes.items()}
branchNames = ("normal", "advanced", "master")
# The first 432 bytes of fumen headers are made up of combinations of byte substrings
# Commonly repeated byte substrings are listed below:
byte_strings = {
# 6-byte substring
'x00': b'\x00\x00\x00\x00\x00\x00',
# 12-byte substrings
'431': b'43\xc8Ag&\x96B"\xe2\xd8B',
'432': b'43\xc8Ag&\x96BD\x84\xb7B',
'433': b'43\xc8A"\xe2\xd8B\x00@\xfaB',
'434': b'43\xc8AD\x84\xb7B"\xe2\xd8B',
'g1': b'g&\x96B4\xa3\x89Cxw\x05A',
'V1': b'V\xd5&B\x00@\xfaB\x00@\xfaB',
'V2': b'V\xd5&B"\xe2\xd8B\x00@\xfaB',
'V3': b'V\xd5&B\x00@\xfaB\xf0\xce\rC',
}
# The simplest 432-byte headers are just 36 repeated copies of the specific 12-byte substrings below
simpleHeaders = [b * 36 for b in [byte_strings['431'], byte_strings['V1'], byte_strings['V2']]]
# The next 80 bytes of fumen headers contain mostly 0s, with a few non-zero bytes thrown in
sampleHeaderMetadata = [0] * 80
# The following bytes are hardcoded by tja2bin.exe (implying they have little/no effect on how the song is parsed)
sampleHeaderMetadata[4] = 16
sampleHeaderMetadata[5] = 39
sampleHeaderMetadata[12] = 10
sampleHeaderMetadata[16] = 8
sampleHeaderMetadata[21] = 255
sampleHeaderMetadata[22] = 255
sampleHeaderMetadata[23] = 255
sampleHeaderMetadata[26] = 1
sampleHeaderMetadata[30] = 1
sampleHeaderMetadata[34] = 1
sampleHeaderMetadata[36] = 20
sampleHeaderMetadata[40] = 10
sampleHeaderMetadata[48] = 1
sampleHeaderMetadata[52] = 20
sampleHeaderMetadata[56] = 10
sampleHeaderMetadata[60] = 1
sampleHeaderMetadata[64] = 30
sampleHeaderMetadata[68] = 30
sampleHeaderMetadata[72] = 20
sampleHeaderMetadata[76] = 78
sampleHeaderMetadata[77] = 97
sampleHeaderMetadata[78] = 188
# Certain other bytes (8+9, 20) will need to be filled in on a song-by-song basis
TJA_COURSE_NAMES = []
for difficulty in ['Ura', 'Oni', 'Hard', 'Normal', 'Easy']:
for player in ['', 'P1', 'P2']:
@ -111,15 +66,6 @@ NORMALIZE_COURSE = {
'Edit': 'Ura'
}
DIFFICULTY_BYTES = {
'Easy': [112, 23],
'Normal': [88, 27],
'Hard': [88, 27],
'Oni': [64, 31],
'Ura': [64, 31],
'Edit': [64, 31]
}
COURSE_IDS = {
'Easy': 'e',
'Normal': 'n',

View File

@ -1,7 +1,5 @@
import re
from tja2fumen.utils import computeSoulGaugeBytes
from tja2fumen.constants import DIFFICULTY_BYTES
from tja2fumen.types import TJAMeasureProcessed, FumenCourse, FumenNote
@ -123,7 +121,6 @@ def convertTJAToFumen(tja):
# Pre-allocate the measures for the converted TJA
fumen = FumenCourse(
measures=len(processedTJABranches['normal']),
hasBranches=all([len(b) for b in processedTJABranches.values()]),
scoreInit=tja.scoreInit,
scoreDiff=tja.scoreDiff,
)
@ -297,20 +294,9 @@ def convertTJAToFumen(tja):
measureFumen.branches[currentBranch].length = note_counter
total_notes += note_counter
# Take a stock header metadata sample and add song-specific metadata
fumen.headerMetadata[8] = DIFFICULTY_BYTES[tja.course][0]
fumen.headerMetadata[9] = DIFFICULTY_BYTES[tja.course][1]
soulGaugeBytes = computeSoulGaugeBytes(
n_notes=total_notes,
difficulty=tja.course,
stars=tja.level
)
fumen.headerMetadata[12] = soulGaugeBytes[0]
fumen.headerMetadata[13] = soulGaugeBytes[1]
fumen.headerMetadata[16] = soulGaugeBytes[2]
fumen.headerMetadata[17] = soulGaugeBytes[3]
fumen.headerMetadata[20] = soulGaugeBytes[4]
fumen.headerMetadata[21] = soulGaugeBytes[5]
fumen.headerMetadata = b"".join(i.to_bytes(1, 'little') for i in fumen.headerMetadata)
# Set song-specific metadata
fumen.header.b512_b515_number_of_measures = len(fumen.measures)
fumen.header.b432_b435_has_branches = int(all([len(b) for b in processedTJABranches.values()]))
fumen.header.set_hp_bytes(total_notes, tja.course, tja.level)
return fumen

View File

@ -2,9 +2,10 @@ import os
import re
from copy import deepcopy
from tja2fumen.utils import readStruct, getBool, shortHex
from tja2fumen.utils import readStruct, shortHex
from tja2fumen.constants import NORMALIZE_COURSE, TJA_NOTE_TYPES, branchNames, noteTypes
from tja2fumen.types import TJASong, TJAMeasure, TJAData, FumenCourse, FumenMeasure, FumenBranch, FumenNote
from tja2fumen.types import (TJASong, TJAMeasure, TJAData,
FumenCourse, FumenMeasure, FumenBranch, FumenNote, FumenHeader)
########################################################################################################################
# TJA-parsing functions ( Original source: https://github.com/WHMHammer/tja-tools/blob/master/src/js/parseTJA.js)
@ -261,33 +262,13 @@ def readFumen(fumenFile, exclude_empty_measures=False):
file = open(fumenFile, "rb")
size = os.fstat(file.fileno()).st_size
# Fetch the header bytes
fumenHeader = file.read(512)
# Determine:
# - The byte order (big or little endian)
# - The total number of measures from byte 0x200 (decimal 512)
measuresBig = readStruct(file, order="", format_string=">I", seek=0x200)[0]
measuresLittle = readStruct(file, order="", format_string="<I", seek=0x200)[0]
if measuresBig < measuresLittle:
order = ">"
totalMeasures = measuresBig
else:
order = "<"
totalMeasures = measuresLittle
# Initialize the dict that will contain the chart information
song = FumenCourse(
headerPadding=fumenHeader[:432],
headerMetadata=fumenHeader[-80:],
order=order,
unknownMetadata=readStruct(file, order, format_string="I", seek=0x204)[0],
hasBranches=getBool(readStruct(file, order, format_string="B", seek=0x1b0)[0])
header=FumenHeader(raw_bytes=file.read(520))
)
# Start reading measure data from position 0x208 (decimal 520)
file.seek(0x208)
for measureNumber in range(totalMeasures):
for measureNumber in range(song.header.b512_b515_number_of_measures):
# Parse the measure data using the following `format_string`:
# "ffBBHiiiiiii" (12 format characters, 40 bytes per measure)
# - 'f': BPM (represented by one float (4 bytes))
@ -297,14 +278,14 @@ def readFumen(fumenFile, exclude_empty_measures=False):
# - 'H': <padding> (represented by one unsigned short (2 bytes))
# - 'iiiiii': branchInfo (represented by six integers (24 bytes))
# - 'i': <padding> (represented by one integer (4 bytes)
measureStruct = readStruct(file, order, format_string="ffBBHiiiiiii")
measureStruct = readStruct(file, song.header.order, format_string="ffBBHiiiiiii")
# Create the measure dictionary using the newly-parsed measure data
measure = FumenMeasure(
bpm=measureStruct[0],
fumenOffsetStart=measureStruct[1],
gogo=getBool(measureStruct[2]),
barline=getBool(measureStruct[3]),
gogo=measureStruct[2],
barline=measureStruct[3],
padding1=measureStruct[4],
branchInfo=list(measureStruct[5:11]),
padding2=measureStruct[11]
@ -317,7 +298,7 @@ def readFumen(fumenFile, exclude_empty_measures=False):
# - 'H': totalNotes (represented by one unsigned short (2 bytes))
# - 'H': <padding> (represented by one unsigned short (2 bytes))
# - 'f': speed (represented by one float (4 bytes)
branchStruct = readStruct(file, order, format_string="HHf")
branchStruct = readStruct(file, song.header.order, format_string="HHf")
# Create the branch dictionary using the newly-parsed branch data
totalNotes = branchStruct[0]
@ -339,7 +320,7 @@ def readFumen(fumenFile, exclude_empty_measures=False):
# - 'H': scoreDiff
# - 'f': duration
# NB: 'item' doesn't seem to be used at all in this function.
noteStruct = readStruct(file, order, format_string="ififHHf")
noteStruct = readStruct(file, song.header.order, format_string="ififHHf")
# Validate the note type
noteType = noteStruct[0]

View File

@ -1,4 +1,8 @@
from tja2fumen.constants import sampleHeaderMetadata, simpleHeaders, TJA_COURSE_NAMES
import csv
import os
import struct
from tja2fumen.constants import TJA_COURSE_NAMES
class TJASong:
@ -71,19 +75,14 @@ class TJAData:
class FumenCourse:
def __init__(self, measures=None, hasBranches=False, scoreInit=0, scoreDiff=0,
order='<', headerPadding=None, headerMetadata=None, unknownMetadata=0):
def __init__(self, measures=None, header=None, scoreInit=0, scoreDiff=0):
if isinstance(measures, int):
self.measures = [FumenMeasure() for _ in range(measures)]
else:
self.measures = [] if measures is None else measures
self.hasBranches = hasBranches
self.header = FumenHeader() if header is None else header
self.scoreInit = scoreInit
self.scoreDiff = scoreDiff
self.order = order
self.headerPadding = simpleHeaders.copy()[0] if headerPadding is None else headerPadding
self.headerMetadata = sampleHeaderMetadata.copy() if headerMetadata is None else headerMetadata
self.unknownMetadata = unknownMetadata
def __repr__(self):
return str(self.__dict__)
@ -138,3 +137,155 @@ class FumenNote:
def __repr__(self):
return str(self.__dict__)
class FumenHeader:
def __init__(self, raw_bytes=None):
if raw_bytes is None:
self.order = "<"
self._assign_default_header_values()
else:
self.order = self._parse_order(raw_bytes)
self._parse_header_values(raw_bytes)
def _assign_default_header_values(self):
self.b000_b431_timing_windows = struct.unpack(self.order + ("fff" * 36),
b'43\xc8Ag&\x96B"\xe2\xd8B' * 36)
self.b432_b435_has_branches = 0
self.b436_b439_hp_max = 10000
self.b440_b443_hp_clear = 8000
self.b444_b447_hp_gain_good = 10
self.b448_b451_hp_gain_ok = 5
self.b452_b455_hp_loss_bad = -20
self.b456_b459_normal_normal_ratio = 65536
self.b460_b463_normal_professional_ratio = 65536
self.b464_b467_normal_master_ratio = 65536
self.b468_b471_branch_points_good = 20
self.b472_b475_branch_points_ok = 10
self.b476_b479_branch_points_bad = 0
self.b480_b483_branch_points_drumroll = 1
self.b484_b487_branch_points_good_BIG = 20
self.b488_b491_branch_points_ok_BIG = 10
self.b492_b495_branch_points_drumroll_BIG = 1
self.b496_b499_branch_points_balloon = 30
self.b500_b503_branch_points_kusudama = 30
self.b504_b507_branch_points_unknown = 20
self.b508_b511_dummy_data = 12345678
self.b512_b515_number_of_measures = 0
self.b516_b519_unknown_data = 0
def _parse_header_values(self, raw_bytes):
self.b000_b431_timing_windows = struct.unpack(self.order + ("fff" * 36), raw_bytes[0:432])
self.b432_b435_has_branches = struct.unpack(self.order + "i", raw_bytes[432:436])[0]
self.b436_b439_hp_max = struct.unpack(self.order + "i", raw_bytes[436:440])[0]
self.b440_b443_hp_clear = struct.unpack(self.order + "i", raw_bytes[440:444])[0]
self.b444_b447_hp_gain_good = struct.unpack(self.order + "i", raw_bytes[444:448])[0]
self.b448_b451_hp_gain_ok = struct.unpack(self.order + "i", raw_bytes[448:452])[0]
self.b452_b455_hp_loss_bad = struct.unpack(self.order + "i", raw_bytes[452:456])[0]
self.b456_b459_normal_normal_ratio = struct.unpack(self.order + "i", raw_bytes[456:460])[0]
self.b460_b463_normal_professional_ratio = struct.unpack(self.order + "i", raw_bytes[460:464])[0]
self.b464_b467_normal_master_ratio = struct.unpack(self.order + "i", raw_bytes[464:468])[0]
self.b468_b471_branch_points_good = struct.unpack(self.order + "i", raw_bytes[468:472])[0]
self.b472_b475_branch_points_ok = struct.unpack(self.order + "i", raw_bytes[472:476])[0]
self.b476_b479_branch_points_bad = struct.unpack(self.order + "i", raw_bytes[476:480])[0]
self.b480_b483_branch_points_drumroll = struct.unpack(self.order + "i", raw_bytes[480:484])[0]
self.b484_b487_branch_points_good_BIG = struct.unpack(self.order + "i", raw_bytes[484:488])[0]
self.b488_b491_branch_points_ok_BIG = struct.unpack(self.order + "i", raw_bytes[488:492])[0]
self.b492_b495_branch_points_drumroll_BIG = struct.unpack(self.order + "i", raw_bytes[492:496])[0]
self.b496_b499_branch_points_balloon = struct.unpack(self.order + "i", raw_bytes[496:500])[0]
self.b500_b503_branch_points_kusudama = struct.unpack(self.order + "i", raw_bytes[500:504])[0]
self.b504_b507_branch_points_unknown = struct.unpack(self.order + "i", raw_bytes[504:508])[0]
self.b508_b511_dummy_data = struct.unpack(self.order + "i", raw_bytes[508:512])[0]
self.b512_b515_number_of_measures = struct.unpack(self.order + "i", raw_bytes[512:516])[0]
self.b516_b519_unknown_data = struct.unpack(self.order + "i", raw_bytes[516:520])[0]
@staticmethod
def _parse_order(raw_bytes):
if struct.unpack(">I", raw_bytes[512:516])[0] < struct.unpack("<I", raw_bytes[512:516])[0]:
return ">"
else:
return "<"
def set_hp_bytes(self, n_notes, difficulty, stars):
self._get_hp_from_LUTs(n_notes, difficulty, stars)
if difficulty == 'Easy':
self.b440_b443_hp_clear = 6000
elif difficulty in ['Normal', 'Hard']:
self.b440_b443_hp_clear = 7000
elif difficulty in ['Oni', 'Ura', 'Edit']:
self.b440_b443_hp_clear = 8000
def _get_hp_from_LUTs(self, n_notes, difficulty, stars):
if difficulty in ['Oni', 'Ura']:
if 9 <= stars:
key = "Oni-9-10"
elif stars == 8:
key = "Oni-8"
elif stars <= 7:
key = "Oni-1-7"
elif difficulty == 'Hard':
if 5 <= stars:
key = "Hard-5-8"
elif stars == 4:
key = "Hard-4"
elif stars == 3:
key = "Hard-3"
elif stars <= 2:
key = "Hard-1-2"
elif difficulty == 'Normal':
if 5 <= stars:
key = "Normal-5-7"
elif stars == 4:
key = "Normal-4"
elif stars == 3:
key = "Normal-3"
elif stars <= 2:
key = "Normal-1-2"
elif difficulty == 'Easy':
if 4 <= stars:
key = "Easy-4-5"
elif 2 <= stars <= 3:
key = "Easy-2-3"
elif stars <= 1:
key = "Easy-1"
pkg_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte1213_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
self.b444_b447_hp_gain_good = int(row[1])
break
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte1617_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
self.b448_b451_hp_gain_ok = int(row[1])
break
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte2021_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
self.b452_b455_hp_loss_bad = int(row[1]) - 765
break
@property
def raw_bytes(self):
value_list = []
format_string = self.order
for key, val in self.__dict__.items():
if key == "order":
pass
elif key == "b000_b431_timing_windows":
value_list.extend(list(val))
format_string += "f" * len(val)
else:
value_list.append(val)
format_string += "i"
raw_bytes = struct.pack(format_string, *value_list)
assert len(raw_bytes) == 520
return raw_bytes
def __repr__(self):
return str([v if not isinstance(v, tuple)
else [round(timing, 2) for timing in v[:3]] # Display truncated version of timing windows
for v in self.__dict__.values()])

View File

@ -1,72 +1,4 @@
import os
import struct
import csv
def computeSoulGaugeBytes(n_notes, difficulty, stars):
if difficulty in ['Oni', 'Ura']:
if 9 <= stars:
key = "Oni-9-10"
elif stars == 8:
key = "Oni-8"
elif stars <= 7:
key = "Oni-1-7"
elif difficulty == 'Hard':
if 5 <= stars:
key = "Hard-5-8"
elif stars == 4:
key = "Hard-4"
elif stars == 3:
key = "Hard-3"
elif stars <= 2:
key = "Hard-1-2"
elif difficulty == 'Normal':
if 5 <= stars:
key = "Normal-5-7"
elif stars == 4:
key = "Normal-4"
elif stars == 3:
key = "Normal-3"
elif stars <= 2:
key = "Normal-1-2"
elif difficulty == 'Easy':
if 4 <= stars:
key = "Easy-4-5"
elif 2 <= stars <= 3:
key = "Easy-2-3"
elif stars <= 1:
key = "Easy-1"
# Set default values for soul gauge bytes.
# NB: These will only be used if n_notes > 2500 (i.e. the most extreme, impossible case, beyond all official charts)
soulGaugeByte12 = 255
soulGaugeByte13 = 3
soulGaugeByte16 = 255
soulGaugeByte17 = 2
soulGaugeByte20 = 255
soulGaugeByte21 = 253
pkg_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte1213_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
soulGaugeByte12 = int(row[1]) % 255
soulGaugeByte13 = int(row[1]) // 255
break
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte1617_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
soulGaugeByte16 = int(row[1]) % 255
soulGaugeByte17 = int(row[1]) // 255
break
with open(os.path.join(pkg_dir, "soulgauge_LUTs", f"byte2021_{key}.csv"), newline='') as csvfile:
lut_reader = csv.reader(csvfile, delimiter=',')
for row in lut_reader:
if int(row[0]) == n_notes:
soulGaugeByte20 = int(row[1]) % 255
soulGaugeByte21 = 253 + (int(row[1]) // 255)
break
return soulGaugeByte12, soulGaugeByte13, soulGaugeByte16, soulGaugeByte17, soulGaugeByte20, soulGaugeByte21
def readStruct(file, order, format_string, seek=None):
@ -106,11 +38,3 @@ def writeStruct(file, order, format_string, value_list, seek=None):
def shortHex(number):
return hex(number)[2:]
def getBool(number):
return True if number == 0x1 else False if number == 0x0 else number
def putBool(boolean):
return 0x1 if boolean is True else 0x0 if boolean is False else boolean

View File

@ -1,18 +1,16 @@
from tja2fumen.utils import writeStruct, putBool
from tja2fumen.utils import writeStruct
from tja2fumen.constants import branchNames, typeNotes
def writeFumen(path_out, song):
# Fetch the byte order (little/big endian)
order = song.order
order = song.header.order
# Write the header
file = open(path_out, "wb")
file.write(song.headerPadding) # Write header padding bytes
file.write(song.headerMetadata) # Write header metadata bytes
file.write(song.header.raw_bytes) # Write header padding bytes
# Preallocate space in the file
len_metadata = 8
len_measures = 0
for measureNumber in range(len(song.measures)):
len_measures += 40
@ -25,12 +23,7 @@ def writeFumen(path_out, song):
note = branch.notes[noteNumber]
if note.type.lower() == "drumroll":
len_measures += 8
file.write(b'\x00' * (len_metadata + len_measures))
# Write metadata
writeStruct(file, order, format_string="B", value_list=[putBool(song.hasBranches)], seek=0x1b0)
writeStruct(file, order, format_string="I", value_list=[len(song.measures)], seek=0x200)
writeStruct(file, order, format_string="I", value_list=[song.unknownMetadata], seek=0x204)
file.write(b'\x00' * len_measures)
# Write measure data
file.seek(0x208)

View File

@ -8,7 +8,7 @@ import pytest
from tja2fumen import main as convert
from tja2fumen.parsers import readFumen
from tja2fumen.constants import COURSE_IDS, NORMALIZE_COURSE, simpleHeaders, byte_strings
from tja2fumen.constants import COURSE_IDS, NORMALIZE_COURSE
@pytest.mark.parametrize('id_song', [
@ -66,11 +66,11 @@ def test_converted_tja_vs_cached_fumen(id_song, tmp_path, entry_point):
co_song = readFumen(path_out, exclude_empty_measures=True)
ca_song = readFumen(os.path.join(path_bin, os.path.basename(path_out)), exclude_empty_measures=True)
# 1. Check song headers
checkValidHeader(co_song.headerPadding+co_song.headerMetadata, strict=True)
checkValidHeader(ca_song.headerPadding+ca_song.headerMetadata)
checkValidHeader(co_song.header)
checkValidHeader(ca_song.header)
# 2. Check song metadata
assert_song_property(co_song, ca_song, 'order')
assert_song_property(co_song, ca_song, 'hasBranches')
assert_song_property(co_song.header, ca_song.header, 'order')
assert_song_property(co_song.header, ca_song.header, 'b432_b435_has_branches')
assert_song_property(co_song, ca_song, 'scoreInit')
assert_song_property(co_song, ca_song, 'scoreDiff')
# 3. Check measure data
@ -152,217 +152,24 @@ def normalize_type(note_type):
return re.sub(r'[0-9]', '', note_type)
def checkValidHeader(headerBytes, strict=False):
# Fumen headers should contain 512 bytes.
assert len(headerBytes) == 512
# The header for fumens can be split into 2 groups: The first 432 bytes (padding), and the last 80 bytes (metadata).
headerPadding = headerBytes[:432]
headerMetadata = headerBytes[-80:]
# 1. Check the header's padding bytes for several possible combinations
# 1a. These simple headers (12-byte substrings repeated 36 times) are used for many Gen2 systems (AC, Wii, etc.)
cond1 = headerPadding in simpleHeaders
# 1b. Starting with Gen3, they began using unique headers for every song. (3DS and PSPDX are the big offenders.)
# - They seem to be some random combination of b_x00 + one of the non-null byte substrings.
# - To avoid enumerating every combination of 432 bytes, we do a lazy check instead.
cond2 = (byte_strings['x00'] in headerPadding and
any(b in headerPadding for b in
[byte_strings[key] for key in ['431', '432', '433', '434', 'V1', 'V2', 'V3']]))
# 1c. The PS4 song 'wii5op' is a special case: It throws in this odd 'g1' string in combo with 2 other substrings.
cond3 = (byte_strings['g1'] in headerPadding and
any(b in headerPadding for b in [byte_strings[key] for key in ['431', 'V2']]))
# Otherwise, this is some unknown header we haven't seen before.
assert cond1 or cond2 or cond3, "Header padding bytes do not match expected fumen byte substrings."
# 2. Check the header's metadata bytes
for idx, val in enumerate(headerMetadata):
# Whether the song has branches
if idx == 0:
assert val in [0, 1], f"Expected 0/1 at position '{idx}', got '{val}' instead."
# 0. Unknown
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5832/7482 charts: [0, 0, 0] (Most platforms)
# - 386/7482 charts: [151, 68, 0]
# - 269/7482 charts: [1, 57, 0]
# - 93/7482 charts: [64, 153, 0]
# - And more...
# - After this, we see a long tail of hundreds of different unique byte combinations.
# * Games with the greatest number of unique byte combinations:
# - VitaMS: 258 unique byte combinations
# - iOSU: 164 unique byte combinations
# - Vita: 153 unique byte combinations
# Given that most platforms use the values (0, 0, 0), and unique values are very platform-specific,
# I'm going to stick with (0, 0, 0) bytes when it comes to converting TJA files to fumens.
elif idx in [1, 2, 3]:
if strict:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."
else:
pass
# 1. <padding>
# Notes: These values are ALWAYS (16, 39), for every valid fumen.
elif idx == 4:
assert val == 16, f"Expected 16 at position '{idx}', got '{val}' instead."
elif idx == 5:
assert val == 39, f"Expected 39 at position '{idx}', got '{val}' instead."
# 2. Difficulty
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 1805/7482 charts: [112, 23] (Easy)
# - 3611/7482 charts: [88, 27] (Normal, Hard)
# - 2016/7482 charts: [64, 31] (Oni, Ura)
# * In other words, all 5 difficulties map to only three different byte-pairs across all valid fumens.
elif idx == 8:
assert val in [88, 64, 112], f"Expected 88/64/112 at position '{idx}', got '{val}' instead."
elif idx == 9:
assert val in [27, 31, 23], f"Expected 27/31/23 at position '{idx}', got '{val}' instead."
# 6. Soul gauge bytes
# Notes:
# * These bytes determine how quickly the soul gauge should increase
# * The precise mapping between n_notes and byte values is complex, and depends on difficulty/stars.
# - See also: https://github.com/vivaria/tja2fumen/issues/14
# * Generally speaking, though, the higher the number of notes, then:
# - The lower that bytes 12/16 will go.
# - The higher that byte 21 will go.
# * Also, most of the time [13, 17] will be 0 and [21, 22, 23] will be 255.
# * However, a very small number of songs (~30) have values different from 0/255.
# - This applies to Easy/Normal songs with VERY few notes (<30).
# * Bytes 12/16 will go above 255 and wrap around back to >=0, incrementing bytes 13/17 by one.
# * Byte 20 will go below and wrap around back to <=255, decrementing byte 21 by one.
elif idx == 12:
assert 1 <= val <= 255
elif idx == 13:
assert val in [0, 1, 2, 3]
elif idx == 16:
assert 1 <= val <= 255
elif idx == 17:
assert val in [0, 1, 2, 3]
elif idx == 20:
assert 1 <= val <= 255
elif idx == 21:
assert val in [253, 254, 255]
elif idx in [22, 23]:
assert val == 255
# 7. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes 21, 22, and 23 have the values (1, 1, 1)
# * For a small minority of charts (~100), one or both of bytes 30/34 will be 0 instead of 1
# Given that most platforms use the values (1, 1, 1), and unique values are very platform-specific,
# I'm going to stick with (1, 1, 1) when it comes to converting TJA files to fumens.
elif idx == 26:
assert val == 1, f"Expected 1 at position '{idx}', got '{val}' instead."
elif idx in [30, 34]:
if strict:
assert val == 1, f"Expected 1 at position '{idx}', got '{val}' instead."
else:
assert val in [1, 0], f"Expected 1/0 at position '{idx}', got '{val}' instead."
# 8. Unknown
# Notes:
# * For the vast majority (99%) of charts, bytes (28, 29) and (32, 33) have the values (0, 0)
# * But, for some games (Gen3Arcade, 3DS), unique values will be stored in these bytes.
# Given that most platforms use the values (0, 0), and unique values are very platform-specific,
# I'm going to stick with (0, 0) when it comes to converting TJA files to fumens.
elif idx in [28, 29]:
if strict:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."
else:
pass
elif idx in [32, 33]:
if strict:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."
else:
pass
# 9. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (36, 40, 48) and (52, 56, 50) have the values (20, 10, 1)
# * For a small minority of charts (~45), these values can be 0,1,2 instead.
# Given that most platforms use the values (20, 10, 1), and unique values are very platform-specific,
# I'm going to stick with (20, 10, 0) when it comes to converting TJA files to fumens.
elif idx in [36, 52]:
if strict:
assert val == 20, f"Expected 20 at position '{idx}', got '{val}' instead."
else:
assert val in [20, 0, 1, 2], f"Expected 20 (or 0,1,2) at position '{idx}', got '{val}' instead."
elif idx in [40, 56]:
if strict:
assert val == 10, f"Expected 10 at position '{idx}', got '{val}' instead."
else:
assert val in [10, 0, 1], f"Expected 10 (or 0,1) at position '{idx}', got '{val}' instead."
elif idx in [48, 60]:
if strict:
assert val == 1, f"Expected 1 at position '{idx}', got '{val}' instead."
else:
# NB: See below for an explanation for why '255' is included for byte 60
assert val in [1, 0, 255], f"Expected 1 (or 0) at position '{idx}', got '{val}' instead."
# 10. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (61, 62, 63) have the values (0, 0, 0)
# * However, for iOS and iOSU charts (144 total), bytes (60, 61, 62, 63) are (255, 255, 255, 255) instead.
# Given that most platforms use the values (0, 0, 0), and unique values are very platform-specific,
# I'm going to stick with (0, 0, 0) when it comes to converting TJA files to fumens.
elif idx in [61, 62, 63]:
if strict:
assert val == 0, f"Expected 0/255 at position '{idx}', got '{val}' instead."
else:
assert val in [0, 255], f"Expected 0/255 at position '{idx}', got '{val}' instead."
# 11. <padding>
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5809/7482 charts: (30, 30, 20)
# - 1577/7482 charts: (30, 30, 0)
# - 41/7482 charts: (0, 0, 0)
# - 3/7482 charts: (1, 0, 0)
# - 2/7482 charts: (0, 0, 20)
# Given that most platforms use the values (30, 30, 20), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [64, 68]:
if strict:
assert val == 30, f"Expected 30 at position '{idx}', got '{val}' instead."
else:
assert val in [30, 0, 1], f"Expected 30 (or 0,1) at position '{idx}', got '{val}' instead."
elif idx == 72:
if strict:
assert val == 20, f"Expected 20 at position '{idx}', got '{val}' instead."
else:
assert val in [20, 0], f"Expected 20 (or 0) at position '{idx}', got '{val}' instead."
# 12. Difficulty (Gen2) and ???? (Gen3)
# Notes:
# * In Gen2 charts (AC, Wii), these values would be one of 4 different byte combinations.
# * These values correspond to the difficulty of the song (no Uras in Gen2, hence 4 values):
# - [192, 42, 12] (Easy)
# - [92, 205, 23] (Normal)
# - [8, 206, 31] (Hard)
# - [288, 193, 44] (Oni)
# * However, starting in Gen3 (AC, console), these bytes were given unique per-song, per-chart values.
# - In total, Gen3 contains 6449 unique combinations of bytes (with some minor overlaps between games).
# For TJA conversion, I plan to just stick with one set of values (78, 97, 188) -- also used by tja2bin.exe.
elif idx == 76:
if strict:
assert val == 78, f"Expected 78 at position '{idx}', got '{val}' instead."
else:
pass
elif idx == 77:
if strict:
assert val == 97, f"Expected 20 at position '{idx}', got '{val}' instead."
else:
pass
elif idx == 78:
if strict:
assert val == 188, f"Expected 20 at position '{idx}', got '{val}' instead."
else:
pass
# 13. Empty bytes
else:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."
def checkValidHeader(header):
assert len(header.raw_bytes) == 520
assert header.b432_b435_has_branches in [0, 1]
assert header.b436_b439_hp_max == 10000
assert header.b440_b443_hp_clear in [6000, 7000, 8000]
assert 10 <= header.b444_b447_hp_gain_good <= 1020
assert 5 <= header.b448_b451_hp_gain_ok <= 1020
assert -765 <= header.b452_b455_hp_loss_bad <= -20
assert header.b456_b459_normal_normal_ratio <= 65536
assert header.b460_b463_normal_professional_ratio <= 65536
assert header.b464_b467_normal_master_ratio <= 65536
assert header.b468_b471_branch_points_good in [20, 0, 1, 2]
assert header.b472_b475_branch_points_ok in [10, 0, 1]
assert header.b476_b479_branch_points_bad == 0
assert header.b480_b483_branch_points_drumroll in [1, 0]
assert header.b484_b487_branch_points_good_BIG in [20, 0, 1, 2]
assert header.b488_b491_branch_points_ok_BIG in [10, 0, 1]
assert header.b492_b495_branch_points_drumroll_BIG in [1, 0]
assert header.b496_b499_branch_points_balloon in [30, 0, 1]
assert header.b500_b503_branch_points_kusudama in [30, 0]

View File

@ -1,26 +0,0 @@
import pytest
from tja2fumen.utils import computeSoulGaugeBytes
@pytest.mark.skip("Incomplete test")
@pytest.mark.parametrize('difficulty,stars,n_notes,b20,b21', [
['Easy', 1, 24, 165, 254], ['Easy', 1, 54, 102, 255], ['Easy', 1, 112, 182, 255],
# TODO: Fetch official fumen values for each difficulty-star pairing
# ['Easy', 2, 0, 0, 0], ['Easy', 2, 0, 0, 0], ['Easy', 3, 0, 0, 0], ['Easy', 3, 0, 0, 0],
# ['Easy', 4, 0, 0, 0], ['Easy', 4, 0, 0, 0], ['Easy', 5, 0, 0, 0], ['Easy', 5, 0, 0, 0],
# ['Normal', 1, 0, 0, 0], ['Normal', 1, 0, 0, 0], ['Normal', 2, 0, 0, 0], ['Normal', 2, 0, 0, 0],
# ['Normal', 3, 0, 0, 0], ['Normal', 3, 0, 0, 0], ['Normal', 3, 0, 0, 0],
# ['Normal', 4, 0, 0, 0], ['Normal', 4, 0, 0, 0], ['Normal', 4, 0, 0, 0],
# ['Normal', 5, 0, 0, 0], ['Normal', 6, 0, 0, 0], ['Normal', 7, 0, 0, 0],
# ['Hard', 1, 0, 0, 0], ['Hard', 1, 0, 0, 0], ['Hard', 2, 0, 0, 0], ['Hard', 2, 0, 0, 0],
# ['Hard', 3, 0, 0, 0], ['Hard', 3, 0, 0, 0], ['Hard', 3, 0, 0, 0],
# ['Hard', 4, 0, 0, 0], ['Hard', 4, 0, 0, 0], ['Hard', 4, 0, 0, 0],
# ['Hard', 5, 0, 0, 0], ['Hard', 6, 0, 0, 0], ['Hard', 7, 0, 0, 0], ['Hard', 8, 0, 0, 0],
# ['Oni', 1, 0, 0, 0], ['Oni', 2, 0, 0, 0], ['Oni', 3, 0, 0, 0],
# ['Oni', 4, 0, 0, 0], ['Oni', 5, 0, 0, 0], ['Oni', 6, 0, 0, 0], ['Oni', 7, 0, 0, 0],
# ['Oni', 8, 0, 0, 0], ['Oni', 8, 0, 0, 0], ['Oni', 8, 0, 0, 0],
# ['Oni', 9, 0, 0, 0], ['Oni', 9, 0, 0, 0], ['Oni', 10, 0, 0, 0], ['Oni', 10, 0, 0, 0],
])
def test_official_fumen_values(difficulty, stars, n_notes, b20, b21):
assert computeSoulGaugeBytes(n_notes, difficulty, stars) == (b20, b21)