1
0
mirror of synced 2024-11-27 22:40:49 +01:00

Move checkValidHeader into testing code

This commit is contained in:
Viv 2023-06-25 13:19:50 -04:00
parent 903410dd99
commit 27b6dbe9d6
3 changed files with 193 additions and 194 deletions

View File

@ -1,7 +1,7 @@
import os
import re
from tja2fumen.utils import readStruct, getBool, shortHex, nameValue, debugPrint, checkValidHeader
from tja2fumen.utils import readStruct, getBool, shortHex, nameValue, debugPrint
from tja2fumen.constants import (
# TJA constants
HEADER_GLOBAL, HEADER_COURSE, BRANCH_COMMANDS, MEASURE_COMMANDS, COMMAND, NORMALIZE_COURSE,
@ -292,9 +292,8 @@ def readFumen(fumenFile, byteOrder=None, debug=False):
file = fumenFile
size = os.fstat(file.fileno()).st_size
# Check for valid fumen header (first 512 bytes) using valid byte substrings
# Fetch the header bytes
fumenHeader = file.read(512)
checkValidHeader(fumenHeader)
# Determine:
# - The byte order (big or little endian)

View File

@ -2,196 +2,6 @@ import sys
import struct
import math
from tja2fumen.constants import simpleHeaders, byte_strings
def checkValidHeader(headerBytes):
# Fumen headers should contain 512 bytes.
assert len(headerBytes) == 512
# The header for fumens can be split into two groups: The first 432 bytes, and the last 80 bytes.
headerPadding = headerBytes[:432]
headerMetadata = headerBytes[-80:]
# 1. Check the header's padding bytes for several possible combinations
# 1a. These simple headers (substrings repeated 36 times) are used for many Gen2 systems (AC, Wii, etc.)
cond1 = headerPadding in simpleHeaders
# 1b. Starting with Gen3, they began using unique headers for every song. (3DS and PSPDX are the big offenders.)
# - They seem to be some random combination of b_x00 + one of the non-null byte substrings.
# - To avoid enumerating every combination of 432 bytes, we do a lazy check instead.
cond2 = (byte_strings['x00'] in headerPadding and
any(b in headerPadding for b in
[byte_strings[key] for key in ['431', '432', '433', '434', 'V1', 'V2', 'V3']]))
# 1c. The PS4 song 'wii5op' is a special case: It throws in this odd 'g1' string in combo with 2 other substrings.
cond3 = (byte_strings['g1'] in headerPadding and
any(b in headerPadding for b in [byte_strings[key] for key in ['431', 'V2']]))
# Otherwise, this is some unknown header we haven't seen before.
assert cond1 or cond2 or cond3, "Header padding bytes do not match expected fumen byte substrings."
# 2. Check the header's metadata bytes
for idx, val in enumerate(headerMetadata):
# 0. Unknown
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5739/7482 charts: [0, 0, 0, 0] (Most platforms)
# - 386/7482 charts: [0, 151, 68, 0]
# - 269/7482 charts: [0, 1, 57, 0]
# - 93/7482 charts: [1, 0, 0, 0]
# - 93/7482 charts: [0, 64, 153, 0]
# - And more...
# - After this, we see a long tail of hundreds of different unique byte combinations.
# * Games with the greatest number of unique byte combinations:
# - VitaMS: 258 unique byte combinations
# - iOSU: 164 unique byte combinations
# - Vita: 153 unique byte combinations
# Given that most platforms use the values (0, 0, 0, 0), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
if idx in [0, 1, 2, 3]:
pass
# 1. <padding>
# Notes: These values are ALWAYS (16, 39), for every valid fumen.
elif idx == 4:
assert val == 16, f"Expected 16 at position '{idx}', got '{val}' instead."
elif idx == 5:
assert val == 39, f"Expected 39 at position '{idx}', got '{val}' instead."
# 2. Difficulty
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 1805/7482 charts: [112, 23] (Easy)
# - 3611/7482 charts: [88, 27] (Normal, Hard)
# - 2016/7482 charts: [64, 31] (Oni, Ura)
# * In other words, all 5 difficulties map to only three different byte-pairs across all valid fumens.
elif idx == 8:
assert val in [88, 64, 112], f"Expected 88/64/112 at position '{idx}', got '{val}' instead."
elif idx == 9:
assert val in [27, 31, 23], f"Expected 27/31/23 at position '{idx}', got '{val}' instead."
# 3. TODO: Note count / drumroll count / note score / song length / etc.
# Notes:
# - For Oni songs, bytes (12, 16, 20) correlate with note count (bytes 13, 17 are always 0):
# * If we look at 10* Oni songs, we see the following 2 ends of the spectrum for bytes (12, 13, 16, 17, 20):
# - (9, 0, 4, 0, 238): Sotsu Omeshii Full (1487 notes)
# - (9, 0, 5, 0, 237): Shimedore 2000 (1414 notes)
# Shimedore 2000+ (1414 notes)
# Silent Jealousy (1408 notes)
# The Future of the Taiko Drum (1400 notes)
# Dairouketen Maou (1396 notes)
# - (10, 0, 5, 0, 235): Yugen no Ran (1262 notes)
# - [...]
# - (27, 0, 14, 0, 201): Pan vs. Gohan! Daikessen! [Normal Route] (480 notes)
# - (28, 0, 14, 0, 200): Anata to Tu-lat-tat-ta (468 notes)
# - (34, 0, 17, 0, 189): GeGeGe no Kitaro [6th Season] (390 notes)
# * Just to confirm, if we look at the top/bottom 9* songs, we see:
# - (8, 0, 4, 0, 240): Hypnosismic -Division Battle Anthem- (1608 notes)
# - (10, 0, 8, 0, 225): Rokuchounen to Ichiyo Monogatari (846 notes)
# - (48, 0, 24, 0, 160): Inscrutable Battle (274 notes)
# * So, to summarize, for Oni songs:
# - As the number of notes increases, bytes 12/16 decrease, and byte 20 increases
# - As the number of notes decreases, bytes 12/16 increase, and byte 20 decreases
#
# - However, the relationship doesn't hold when checking, for example, 1* Easy charts
# * Bytes 13 and 17, which were previously always 0, are now 0/1/2:
# - (249, 0, 187, 0, 132): Let's go! Smile Precure (67 notes)
# - (249, 1, 123, 1, 3): Anata to Tu-lat-tat-ta (33 notes)
# - (44, 2, 161, 1, 234): Do you want to build a Snowman? (30 notes)
# - (0, 1, 192, 0, 128): Odoru Ponpokorin (65 notes)
# * I'm having trouble making sense of the relationships between these bytes.
elif idx in [12, 13]:
pass
elif idx in [16, 17]:
pass
elif idx == 20:
pass
# 6. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes 21, 22, and 23 have the values (255, 255, 255)
# * For a very tiny minority of charts (~5), byte 21 will be 254 or 253 instead.
# Given that most platforms use the values (255, 255, 255), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [21, 22, 23]:
assert val in [253, 254, 255], f"Expected 255 at position '{idx}', got '{val}' instead."
# 7. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes 21, 22, and 23 have the values (1, 1, 1)
# * For a small minority of charts (~100), one or both of bytes 30/34 will be 0 instead of 1
# Given that most platforms use the values (1, 1, 1), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx == 26:
assert val == 1, f"Expected 1 at position '{idx}', got '{val}' instead."
elif idx in [30, 34]:
assert val in [1, 0], f"Expected 1/0 at position '{idx}', got '{val}' instead."
# 8. Unknown
# Notes:
# * For the vast majority (99%) of charts, bytes (28, 29) and (32, 33) have the values (0, 0)
# * But, for some games (Gen3Arcade, 3DS), unique values will be stored in these bytes.
# Given that most platforms use the values (0, 0), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [28, 29]:
pass
elif idx in [32, 33]:
pass
# 8. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (36, 40, 48) and (52, 56, 50) have the values (20, 10, 1)
# * For a small minority of charts (~45), these values can be 0,1,2 instead.
# Given that most platforms use the values (20, 10, 1), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [36, 52]:
assert val in [20, 0, 1, 2], f"Expected 20 (or 0,1,2) at position '{idx}', got '{val}' instead."
elif idx in [40, 56]:
assert val in [10, 0, 1], f"Expected 10 (or 0,1) at position '{idx}', got '{val}' instead."
elif idx in [48, 60]:
# NB: See below for an explanation about '255' for byte 60
assert val in [1, 0, 255], f"Expected 1 (or 0) at position '{idx}', got '{val}' instead."
# 8. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (61, 62, 63) have the values (0, 0, 0)
# * However, for iOS and iOSU charts (144 total), bytes (60, 61, 62, 63) are (255, 255, 255, 255) instead.
# Given that most platforms use the values (0, 0, 0), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [61, 62, 63]:
assert val in [0, 255], f"Expected 0/255 at position '{idx}', got '{val}' instead."
# 9. <padding>
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5809/7482 charts: (30, 30, 20)
# - 1577/7482 charts: (30, 30, 0)
# - 41/7482 charts: (0, 0, 0)
# - 3/7482 charts: (1, 0, 0)
# - 2/7482 charts: (0, 0, 20)
# Given that most platforms use the values (30, 30, 20), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [64, 68]:
assert val in [30, 0, 1], f"Expected 30/0 at position '{idx}', got '{val}' instead."
elif idx == 72:
assert val in [20, 0], f"Expected 20/0 at position '{idx}', got '{val}' instead."
# 10. Difficulty (Gen2) and ???? (Gen3)
# Notes:
# * In Gen2 charts (AC, Wii), these values would be one of 4 different byte combinations.
# * These values correspond to the difficulty of the song (no Uras in Gen2, hence 4 values):
# - [192, 42, 12] (Easy)
# - [92, 205, 23] (Normal)
# - [8, 206, 31] (Hard)
# - [288, 193, 44] (Oni)
# * However, starting in Gen3 (AC, console), these bytes were given unique per-song, per-chart values.
# - In total, Gen3 contains 6449 unique combinations of bytes (with some minor overlaps between games).
# For TJA conversion, I plan to just stick with the Gen2 scheme (and make up the missing value for Uras),
# which would be much easier than trying to figure out the Gen3 scheme.
elif idx in [76, 77, 78]:
pass
# 11. Empty bytes
else:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."
def computeSoulGaugeByte(n_notes):
# I don't think this is fully accurate. It doesn't work for non-Oni songs, and it's usually off by a bit.

View File

@ -7,7 +7,7 @@ import pytest
from tja2fumen import main as convert
from tja2fumen.parsers import readFumen
from tja2fumen.constants import COURSE_IDS, NORMALIZE_COURSE
from tja2fumen.constants import COURSE_IDS, NORMALIZE_COURSE, simpleHeaders, byte_strings
def normalize_type(note_type):
@ -61,6 +61,8 @@ def test_converted_tja_vs_cached_fumen(id_song, tmp_path):
co_song = readFumen(path_out)
ca_song = readFumen(os.path.join(path_bin, os.path.basename(path_out)))
# 1. Check song headers
checkValidHeader(co_song['headerPadding']+co_song['headerMetadata'])
checkValidHeader(ca_song['headerPadding']+ca_song['headerMetadata'])
assert_song_property(co_song, ca_song, 'headerPadding', func=len)
assert_song_property(co_song, ca_song, 'headerMetadata', func=len)
# 2. Check song metadata
@ -99,3 +101,191 @@ def test_converted_tja_vs_cached_fumen(id_song, tmp_path):
# NB: 'item' still needs to be implemented: https://github.com/vivaria/tja2fumen/issues/17
# assert_song_property(co_note, ca_note, 'item', i_measure, i_branch, i_note)
def checkValidHeader(headerBytes):
# Fumen headers should contain 512 bytes.
assert len(headerBytes) == 512
# The header for fumens can be split into two groups: The first 432 bytes, and the last 80 bytes.
headerPadding = headerBytes[:432]
headerMetadata = headerBytes[-80:]
# 1. Check the header's padding bytes for several possible combinations
# 1a. These simple headers (substrings repeated 36 times) are used for many Gen2 systems (AC, Wii, etc.)
cond1 = headerPadding in simpleHeaders
# 1b. Starting with Gen3, they began using unique headers for every song. (3DS and PSPDX are the big offenders.)
# - They seem to be some random combination of b_x00 + one of the non-null byte substrings.
# - To avoid enumerating every combination of 432 bytes, we do a lazy check instead.
cond2 = (byte_strings['x00'] in headerPadding and
any(b in headerPadding for b in
[byte_strings[key] for key in ['431', '432', '433', '434', 'V1', 'V2', 'V3']]))
# 1c. The PS4 song 'wii5op' is a special case: It throws in this odd 'g1' string in combo with 2 other substrings.
cond3 = (byte_strings['g1'] in headerPadding and
any(b in headerPadding for b in [byte_strings[key] for key in ['431', 'V2']]))
# Otherwise, this is some unknown header we haven't seen before.
assert cond1 or cond2 or cond3, "Header padding bytes do not match expected fumen byte substrings."
# 2. Check the header's metadata bytes
for idx, val in enumerate(headerMetadata):
# 0. Unknown
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5739/7482 charts: [0, 0, 0, 0] (Most platforms)
# - 386/7482 charts: [0, 151, 68, 0]
# - 269/7482 charts: [0, 1, 57, 0]
# - 93/7482 charts: [1, 0, 0, 0]
# - 93/7482 charts: [0, 64, 153, 0]
# - And more...
# - After this, we see a long tail of hundreds of different unique byte combinations.
# * Games with the greatest number of unique byte combinations:
# - VitaMS: 258 unique byte combinations
# - iOSU: 164 unique byte combinations
# - Vita: 153 unique byte combinations
# Given that most platforms use the values (0, 0, 0, 0), and unique values are very platform-specific,
# I'm going to stick with (0, 0, 0, 0) bytes when it comes to converting TJA files to fumens.
if idx in [0, 1, 2, 3]:
pass
# 1. <padding>
# Notes: These values are ALWAYS (16, 39), for every valid fumen.
elif idx == 4:
assert val == 16, f"Expected 16 at position '{idx}', got '{val}' instead."
elif idx == 5:
assert val == 39, f"Expected 39 at position '{idx}', got '{val}' instead."
# 2. Difficulty
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 1805/7482 charts: [112, 23] (Easy)
# - 3611/7482 charts: [88, 27] (Normal, Hard)
# - 2016/7482 charts: [64, 31] (Oni, Ura)
# * In other words, all 5 difficulties map to only three different byte-pairs across all valid fumens.
elif idx == 8:
assert val in [88, 64, 112], f"Expected 88/64/112 at position '{idx}', got '{val}' instead."
elif idx == 9:
assert val in [27, 31, 23], f"Expected 27/31/23 at position '{idx}', got '{val}' instead."
# 3. TODO: Note count / drumroll count / note score / song length / etc.
# Notes:
# - For Oni songs, bytes (12, 16, 20) correlate with note count (bytes 13, 17 are always 0):
# * If we look at 10* Oni songs, we see the following 2 ends of the spectrum for bytes (12, 13, 16, 17, 20):
# - (9, 0, 4, 0, 238): Sotsu Omeshii Full (1487 notes)
# - (9, 0, 5, 0, 237): Shimedore 2000 (1414 notes)
# Shimedore 2000+ (1414 notes)
# Silent Jealousy (1408 notes)
# The Future of the Taiko Drum (1400 notes)
# Dairouketen Maou (1396 notes)
# - (10, 0, 5, 0, 235): Yugen no Ran (1262 notes)
# - [...]
# - (27, 0, 14, 0, 201): Pan vs. Gohan! Daikessen! [Normal Route] (480 notes)
# - (28, 0, 14, 0, 200): Anata to Tu-lat-tat-ta (468 notes)
# - (34, 0, 17, 0, 189): GeGeGe no Kitaro [6th Season] (390 notes)
# * Just to confirm, if we look at the top/bottom 9* songs, we see:
# - (8, 0, 4, 0, 240): Hypnosismic -Division Battle Anthem- (1608 notes)
# - (10, 0, 8, 0, 225): Rokuchounen to Ichiyo Monogatari (846 notes)
# - (48, 0, 24, 0, 160): Inscrutable Battle (274 notes)
# * So, to summarize, for Oni songs:
# - As the number of notes increases, bytes 12/16 decrease, and byte 20 increases
# - As the number of notes decreases, bytes 12/16 increase, and byte 20 decreases
#
# - However, the relationship doesn't hold when checking, for example, 1* Easy charts
# * Bytes 13 and 17, which were previously always 0, are now 0/1/2:
# - (249, 0, 187, 0, 132): Let's go! Smile Precure (67 notes)
# - (249, 1, 123, 1, 3): Anata to Tu-lat-tat-ta (33 notes)
# - (44, 2, 161, 1, 234): Do you want to build a Snowman? (30 notes)
# - (0, 1, 192, 0, 128): Odoru Ponpokorin (65 notes)
# * I'm having trouble making sense of the relationships between these bytes.
elif idx in [12, 13]:
pass
elif idx in [16, 17]:
pass
elif idx == 20:
pass
# 6. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes 21, 22, and 23 have the values (255, 255, 255)
# * For a very tiny minority of charts (~5), byte 21 will be 254 or 253 instead.
# Given that most platforms use the values (255, 255, 255), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [21, 22, 23]:
assert val in [253, 254, 255], f"Expected 255 at position '{idx}', got '{val}' instead."
# 7. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes 21, 22, and 23 have the values (1, 1, 1)
# * For a small minority of charts (~100), one or both of bytes 30/34 will be 0 instead of 1
# Given that most platforms use the values (1, 1, 1), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx == 26:
assert val == 1, f"Expected 1 at position '{idx}', got '{val}' instead."
elif idx in [30, 34]:
assert val in [1, 0], f"Expected 1/0 at position '{idx}', got '{val}' instead."
# 8. Unknown
# Notes:
# * For the vast majority (99%) of charts, bytes (28, 29) and (32, 33) have the values (0, 0)
# * But, for some games (Gen3Arcade, 3DS), unique values will be stored in these bytes.
# Given that most platforms use the values (0, 0), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [28, 29]:
pass
elif idx in [32, 33]:
pass
# 8. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (36, 40, 48) and (52, 56, 50) have the values (20, 10, 1)
# * For a small minority of charts (~45), these values can be 0,1,2 instead.
# Given that most platforms use the values (20, 10, 1), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [36, 52]:
assert val in [20, 0, 1, 2], f"Expected 20 (or 0,1,2) at position '{idx}', got '{val}' instead."
elif idx in [40, 56]:
assert val in [10, 0, 1], f"Expected 10 (or 0,1) at position '{idx}', got '{val}' instead."
elif idx in [48, 60]:
# NB: See below for an explanation about '255' for byte 60
assert val in [1, 0, 255], f"Expected 1 (or 0) at position '{idx}', got '{val}' instead."
# 8. <padding>
# Notes:
# * For the vast majority (99%) of charts, bytes (61, 62, 63) have the values (0, 0, 0)
# * However, for iOS and iOSU charts (144 total), bytes (60, 61, 62, 63) are (255, 255, 255, 255) instead.
# Given that most platforms use the values (0, 0, 0), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [61, 62, 63]:
assert val in [0, 255], f"Expected 0/255 at position '{idx}', got '{val}' instead."
# 9. <padding>
# Notes:
# * Breakdown of distribution of different byte combinations:
# - 5809/7482 charts: (30, 30, 20)
# - 1577/7482 charts: (30, 30, 0)
# - 41/7482 charts: (0, 0, 0)
# - 3/7482 charts: (1, 0, 0)
# - 2/7482 charts: (0, 0, 20)
# Given that most platforms use the values (30, 30, 20), and unique values are very platform-specific,
# I'm going to ignore the unique bytes when it comes to converting TJA files to fumens.
elif idx in [64, 68]:
assert val in [30, 0, 1], f"Expected 30/0 at position '{idx}', got '{val}' instead."
elif idx == 72:
assert val in [20, 0], f"Expected 20/0 at position '{idx}', got '{val}' instead."
# 10. Difficulty (Gen2) and ???? (Gen3)
# Notes:
# * In Gen2 charts (AC, Wii), these values would be one of 4 different byte combinations.
# * These values correspond to the difficulty of the song (no Uras in Gen2, hence 4 values):
# - [192, 42, 12] (Easy)
# - [92, 205, 23] (Normal)
# - [8, 206, 31] (Hard)
# - [288, 193, 44] (Oni)
# * However, starting in Gen3 (AC, console), these bytes were given unique per-song, per-chart values.
# - In total, Gen3 contains 6449 unique combinations of bytes (with some minor overlaps between games).
# For TJA conversion, I plan to just stick with the Gen2 scheme (and make up the missing value for Uras),
# which would be much easier than trying to figure out the Gen3 scheme.
elif idx in [76, 77, 78]:
pass
# 11. Empty bytes
else:
assert val == 0, f"Expected 0 at position '{idx}', got '{val}' instead."