2023-06-25 07:02:31 +02:00
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import zipfile
|
|
|
|
import re
|
2023-06-29 07:20:46 +02:00
|
|
|
import glob
|
2023-06-25 07:02:31 +02:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
2023-06-30 02:34:08 +02:00
|
|
|
from tja2fumen import main as convert
|
2023-07-20 03:50:09 +02:00
|
|
|
from tja2fumen.parsers import read_fumen
|
Overhaul header metadata code with new `FumenHeader` Python class (plus use ints, not bytes) (#45)
Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".
However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.
In summary, the changes in this PR are:
- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..
Necessary for #40.
Fixes #44.
2023-07-19 20:36:58 +02:00
|
|
|
from tja2fumen.constants import COURSE_IDS, NORMALIZE_COURSE
|
2023-06-25 07:02:31 +02:00
|
|
|
|
|
|
|
|
2023-06-29 07:39:55 +02:00
|
|
|
@pytest.mark.parametrize('id_song', [
|
2023-07-19 22:47:08 +02:00
|
|
|
pytest.param('shoto9', marks=pytest.mark.skip("TJA structure does not match fumen yet.")),
|
2023-07-15 17:58:33 +02:00
|
|
|
pytest.param('genpe'),
|
2023-07-10 04:54:45 +02:00
|
|
|
pytest.param('gimcho'),
|
Add support for `#BRANCHSTART p,999,999` and `#SECTION` commands via `imcanz.tja` (#30)
This PR adds a new chart to the test suite (`imcanz.tja`) that uses
`p,0,0`, `p,999,999` _and_ `#SECTION` commands:
- `p,0,0`: Forces the chart into the Master branch (since it's
impossible to fail a 0% accuracy requirement)
- `p,999,999`: Forces the chart into the Normal branch (since it's
impossible to pass a 999% accuracy requirement)
- `#SECTION`: Resets accuracy values for notes and drumrolls on the next
measure. (In practice, this just means that the branch condition is
repeated on the next measure, at least according to the official fumen I
have.)
Note: Only the Oni and Hard difficulties have actually been added to the
test suite. The Normal and Easy charts were too broken to easily match
the official fumens. They will need a lot of work to fix charting
errors, so I'm leaving them commented out for now.
Fixes #27.
2023-07-10 01:56:57 +02:00
|
|
|
pytest.param('imcanz'),
|
2023-07-09 00:04:24 +02:00
|
|
|
pytest.param('clsca'),
|
2023-07-06 05:53:48 +02:00
|
|
|
pytest.param('linda'),
|
2023-07-05 22:03:41 +02:00
|
|
|
pytest.param('senpac'),
|
2023-07-02 06:00:36 +02:00
|
|
|
pytest.param('butou5'),
|
2023-07-01 23:41:23 +02:00
|
|
|
pytest.param('hol6po'),
|
2023-06-29 07:39:55 +02:00
|
|
|
pytest.param('mikdp'),
|
2023-06-30 01:46:57 +02:00
|
|
|
pytest.param('ia6cho'),
|
2023-06-29 07:39:55 +02:00
|
|
|
])
|
2023-06-29 07:20:46 +02:00
|
|
|
def test_converted_tja_vs_cached_fumen(id_song, tmp_path, entry_point):
|
2023-06-25 07:02:31 +02:00
|
|
|
# Define the testing directory
|
|
|
|
path_test = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
|
|
# Define the working directory
|
|
|
|
path_temp = os.path.join(tmp_path, id_song)
|
|
|
|
os.mkdir(path_temp)
|
|
|
|
|
|
|
|
# Copy input TJA to working directory
|
|
|
|
path_tja = os.path.join(path_test, "data", f"{id_song}.tja")
|
|
|
|
path_tja_tmp = os.path.join(path_temp, f"{id_song}.tja")
|
|
|
|
shutil.copy(path_tja, path_tja_tmp)
|
|
|
|
|
|
|
|
# Convert TJA file to fumen files
|
2023-06-29 07:20:46 +02:00
|
|
|
if entry_point == "python-api":
|
|
|
|
convert(argv=[path_tja_tmp])
|
|
|
|
elif entry_point == "python-cli":
|
|
|
|
os.system(f"tja2fumen {path_tja_tmp}")
|
|
|
|
elif entry_point == "exe":
|
|
|
|
exe_path = glob.glob(os.path.join(os.path.split(path_test)[0], "dist", "*.exe"))[0]
|
|
|
|
os.system(f"{exe_path} {path_tja_tmp}")
|
|
|
|
|
|
|
|
# Fetch output fumen paths
|
|
|
|
paths_out = glob.glob(os.path.join(path_temp, "*.bin"))
|
|
|
|
assert paths_out, f"No bin files generated in {path_temp}"
|
2023-06-29 21:40:05 +02:00
|
|
|
order = "xmhne" # Ura Oni -> Oni -> Hard -> Normal -> Easy
|
|
|
|
paths_out = sorted(paths_out, key=lambda s: [order.index(c) if c in order else len(order) for c in s])
|
2023-06-25 07:02:31 +02:00
|
|
|
|
|
|
|
# Extract cached fumen files to working directory
|
|
|
|
path_binzip = os.path.join(path_test, "data", f"{id_song}.zip")
|
|
|
|
path_bin = os.path.join(path_temp, "ca_bins")
|
|
|
|
with zipfile.ZipFile(path_binzip, 'r') as zip_ref:
|
|
|
|
zip_ref.extractall(path_bin)
|
|
|
|
|
|
|
|
# Compare cached fumen with generated fumen
|
|
|
|
for path_out in paths_out:
|
|
|
|
# Difficulty introspection to help with debugging
|
|
|
|
i_difficult_id = os.path.basename(path_out).split(".")[0].split("_")[1]
|
|
|
|
i_difficulty = NORMALIZE_COURSE[{v: k for k, v in COURSE_IDS.items()}[i_difficult_id]] # noqa
|
|
|
|
# 0. Read fumen data (converted vs. cached)
|
2023-07-20 03:50:09 +02:00
|
|
|
co_song = read_fumen(path_out, exclude_empty_measures=True)
|
|
|
|
ca_song = read_fumen(os.path.join(path_bin, os.path.basename(path_out)), exclude_empty_measures=True)
|
2023-06-25 07:02:31 +02:00
|
|
|
# 1. Check song headers
|
Overhaul header metadata code with new `FumenHeader` Python class (plus use ints, not bytes) (#45)
Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".
However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.
In summary, the changes in this PR are:
- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..
Necessary for #40.
Fixes #44.
2023-07-19 20:36:58 +02:00
|
|
|
checkValidHeader(co_song.header)
|
|
|
|
checkValidHeader(ca_song.header)
|
2023-06-25 07:02:31 +02:00
|
|
|
# 2. Check song metadata
|
Overhaul header metadata code with new `FumenHeader` Python class (plus use ints, not bytes) (#45)
Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".
However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.
In summary, the changes in this PR are:
- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..
Necessary for #40.
Fixes #44.
2023-07-19 20:36:58 +02:00
|
|
|
assert_song_property(co_song.header, ca_song.header, 'order')
|
|
|
|
assert_song_property(co_song.header, ca_song.header, 'b432_b435_has_branches')
|
2023-07-20 03:50:09 +02:00
|
|
|
assert_song_property(co_song, ca_song, 'score_init')
|
|
|
|
assert_song_property(co_song, ca_song, 'score_diff')
|
2023-06-25 07:02:31 +02:00
|
|
|
# 3. Check measure data
|
2023-07-12 03:30:55 +02:00
|
|
|
for i_measure in range(max([len(co_song.measures), len(ca_song.measures)])):
|
2023-06-29 21:40:38 +02:00
|
|
|
# NB: We could assert that len(measures) is the same for both songs, then iterate through zipped measures.
|
|
|
|
# But, if there is a mismatched number of measures, we want to know _where_ it occurs. So, we let the
|
|
|
|
# comparison go on using the max length of both songs until something else fails.
|
2023-07-12 03:30:55 +02:00
|
|
|
co_measure = co_song.measures[i_measure]
|
|
|
|
ca_measure = ca_song.measures[i_measure]
|
2023-06-25 07:02:31 +02:00
|
|
|
# 3a. Check measure metadata
|
|
|
|
assert_song_property(co_measure, ca_measure, 'bpm', i_measure, abs=0.01)
|
2023-07-20 03:50:09 +02:00
|
|
|
assert_song_property(co_measure, ca_measure, 'fumen_offset_start', i_measure, abs=0.15)
|
2023-06-25 07:02:31 +02:00
|
|
|
assert_song_property(co_measure, ca_measure, 'gogo', i_measure)
|
|
|
|
assert_song_property(co_measure, ca_measure, 'barline', i_measure)
|
2023-07-15 17:58:33 +02:00
|
|
|
|
|
|
|
# NB: KAGEKIYO's fumen has some strange details that can't be replicated using the TJA charting format.
|
|
|
|
# So, for now, we use a special case to skip checking A) notes for certain measures and B) branchInfo
|
|
|
|
if id_song == 'genpe':
|
|
|
|
# A) The 2/4 measures in the Ura of KAGEKIYO's official Ura fumen don't match the wikiwiki.jp/TJA
|
|
|
|
# charts. In the official fumen, the note ms offsets of branches 5/12/17/etc. go _past_ the duration of
|
|
|
|
# the measure. This behavior is impossible to represent using the TJA format, so we skip checking notes
|
|
|
|
# for these measures, since the rest of the measures have perfect note ms offsets anyway.
|
|
|
|
if i_difficult_id == "x" and i_measure in [5, 6, 12, 13, 17, 18, 26, 27, 46, 47, 51, 52, 56, 57]:
|
|
|
|
continue
|
|
|
|
# B) The branching condition for KAGEKIYO is very strange (accuracy for the 7 big notes in the song)
|
|
|
|
# So, we only test the branchInfo bytes for non-KAGEKIYO songs:
|
|
|
|
else:
|
2023-07-20 03:50:09 +02:00
|
|
|
assert_song_property(co_measure, ca_measure, 'branch_info', i_measure)
|
2023-07-15 17:58:33 +02:00
|
|
|
|
2023-06-25 07:02:31 +02:00
|
|
|
# 3b. Check measure notes
|
|
|
|
for i_branch in ['normal', 'advanced', 'master']:
|
2023-07-12 03:30:55 +02:00
|
|
|
co_branch = co_measure.branches[i_branch]
|
|
|
|
ca_branch = ca_measure.branches[i_branch]
|
2023-07-15 17:58:33 +02:00
|
|
|
# NB: We only check speed for non-empty branches, as fumens store speed changes even for empty branches
|
|
|
|
if co_branch.length != 0:
|
|
|
|
assert_song_property(co_branch, ca_branch, 'speed', i_measure, i_branch)
|
2023-06-30 01:43:48 +02:00
|
|
|
# NB: We could assert that len(notes) is the same for both songs, then iterate through zipped notes.
|
|
|
|
# But, if there is a mismatched number of notes, we want to know _where_ it occurs. So, we let the
|
|
|
|
# comparison go on using the max length of both branches until something else fails.
|
2023-07-12 03:30:55 +02:00
|
|
|
for i_note in range(max([co_branch.length, ca_branch.length])):
|
|
|
|
co_note = co_branch.notes[i_note]
|
|
|
|
ca_note = ca_branch.notes[i_note]
|
|
|
|
assert_song_property(co_note, ca_note, 'note_type', i_measure, i_branch, i_note, func=normalize_type)
|
2023-07-09 00:34:03 +02:00
|
|
|
assert_song_property(co_note, ca_note, 'pos', i_measure, i_branch, i_note, abs=0.1)
|
2023-07-01 23:41:23 +02:00
|
|
|
# NB: Drumroll duration doesn't always end exactly on a beat. Plus, TJA charters often eyeball
|
|
|
|
# drumrolls, leading them to be often off by a 1/4th/8th/16th/32th/etc. These charting errors
|
|
|
|
# are fixable, but tedious to do when writing tests. So, I've added a try/except so that they
|
|
|
|
# can be checked locally with a breakpoint when adding new songs, but so that fixing every
|
|
|
|
# duration-related chart error isn't 100% mandatory.
|
|
|
|
try:
|
|
|
|
assert_song_property(co_note, ca_note, 'duration', i_measure, i_branch, i_note, abs=25.0)
|
|
|
|
except AssertionError:
|
|
|
|
pass
|
2023-07-12 03:30:55 +02:00
|
|
|
if ca_note.note_type not in ["Balloon", "Kusudama"]:
|
2023-07-20 03:50:09 +02:00
|
|
|
assert_song_property(co_note, ca_note, 'score_init', i_measure, i_branch, i_note)
|
|
|
|
assert_song_property(co_note, ca_note, 'score_diff', i_measure, i_branch, i_note)
|
2023-06-25 18:14:56 +02:00
|
|
|
# NB: 'item' still needs to be implemented: https://github.com/vivaria/tja2fumen/issues/17
|
2023-06-25 07:02:31 +02:00
|
|
|
# assert_song_property(co_note, ca_note, 'item', i_measure, i_branch, i_note)
|
|
|
|
|
2023-06-25 19:19:50 +02:00
|
|
|
|
2023-06-29 21:45:01 +02:00
|
|
|
def assert_song_property(converted_obj, cached_obj, prop, measure=None, branch=None, note=None, func=None, abs=None):
|
2023-06-29 20:47:12 +02:00
|
|
|
# NB: TJA parser/converter uses 0-based indexing, but TJA files use 1-based indexing.
|
|
|
|
# So, we increment 1 in the error message to more easily identify problematic lines in TJA files.
|
|
|
|
msg_failure = f"'{prop}' mismatch"
|
|
|
|
msg_failure += f": measure '{measure+1}'" if measure is not None else ""
|
|
|
|
msg_failure += f", branch '{branch}'" if branch is not None else ""
|
|
|
|
msg_failure += f", note '{note+1}'" if note is not None else ""
|
2023-07-12 03:30:55 +02:00
|
|
|
converted_val = converted_obj.__getattribute__(prop)
|
|
|
|
cached_val = cached_obj.__getattribute__(prop)
|
2023-06-29 20:47:12 +02:00
|
|
|
if func:
|
2023-07-12 03:30:55 +02:00
|
|
|
assert func(converted_val) == func(cached_val), msg_failure
|
2023-06-29 20:47:12 +02:00
|
|
|
elif abs:
|
2023-07-12 03:30:55 +02:00
|
|
|
assert converted_val == pytest.approx(cached_val, abs=abs), msg_failure
|
2023-06-29 20:47:12 +02:00
|
|
|
else:
|
2023-07-12 03:30:55 +02:00
|
|
|
assert converted_val == cached_val, msg_failure
|
2023-06-29 20:47:12 +02:00
|
|
|
|
|
|
|
|
|
|
|
def normalize_type(note_type):
|
|
|
|
return re.sub(r'[0-9]', '', note_type)
|
|
|
|
|
|
|
|
|
Overhaul header metadata code with new `FumenHeader` Python class (plus use ints, not bytes) (#45)
Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".
However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.
In summary, the changes in this PR are:
- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..
Necessary for #40.
Fixes #44.
2023-07-19 20:36:58 +02:00
|
|
|
def checkValidHeader(header):
|
|
|
|
assert len(header.raw_bytes) == 520
|
|
|
|
assert header.b432_b435_has_branches in [0, 1]
|
|
|
|
assert header.b436_b439_hp_max == 10000
|
|
|
|
assert header.b440_b443_hp_clear in [6000, 7000, 8000]
|
|
|
|
assert 10 <= header.b444_b447_hp_gain_good <= 1020
|
|
|
|
assert 5 <= header.b448_b451_hp_gain_ok <= 1020
|
|
|
|
assert -765 <= header.b452_b455_hp_loss_bad <= -20
|
|
|
|
assert header.b456_b459_normal_normal_ratio <= 65536
|
|
|
|
assert header.b460_b463_normal_professional_ratio <= 65536
|
|
|
|
assert header.b464_b467_normal_master_ratio <= 65536
|
|
|
|
assert header.b468_b471_branch_points_good in [20, 0, 1, 2]
|
|
|
|
assert header.b472_b475_branch_points_ok in [10, 0, 1]
|
|
|
|
assert header.b476_b479_branch_points_bad == 0
|
|
|
|
assert header.b480_b483_branch_points_drumroll in [1, 0]
|
2023-07-20 03:50:09 +02:00
|
|
|
assert header.b484_b487_branch_points_good_big in [20, 0, 1, 2]
|
|
|
|
assert header.b488_b491_branch_points_ok_big in [10, 0, 1]
|
|
|
|
assert header.b492_b495_branch_points_drumroll_big in [1, 0]
|
Overhaul header metadata code with new `FumenHeader` Python class (plus use ints, not bytes) (#45)
Previously, I had to reverse engineer many of the bytes in a valid fumen
header. This meant that I was often working with literal byte values, as
well as guessing at what things meant, calling them "padding" or
"unknown".
However, thanks to #44, I now have a TON of knowledge about the correct
purpose of each byte in the header. This lets me properly document each
value's purpose, and it also lets me parse 4-byte groups as ints, rather
than dealing with individual raw bytes.
In summary, the changes in this PR are:
- Create a new `FumenHeader` class with individual (correctly-named!)
attributes for each 4-byte integer.
- Parse and write the fumen header as one big 520-byte chunk, instead of
handling the header in smaller increments.
- Remove a bunch of dead lines of code that were hardcoding specific
byte values, and replace them with proper integer values.
- Vastly simplify the test for valid headers by removing checks against
individual bytes..
Necessary for #40.
Fixes #44.
2023-07-19 20:36:58 +02:00
|
|
|
assert header.b496_b499_branch_points_balloon in [30, 0, 1]
|
|
|
|
assert header.b500_b503_branch_points_kusudama in [30, 0]
|
2023-06-25 19:19:50 +02:00
|
|
|
|