1
0
mirror of synced 2024-11-23 21:20:56 +01:00

Fix parsing of BALLOON: so that values are correctly read for all 3 branches (#80)

I was making a bad assumption:

1. Songs with branches have the same number of balloons.
2. Each balloon has the same number of hits across branches.

Because I was making these assumptions, I thought I could just repeat
the `BALLOON:` field for each branch. **But this is wrong!!** Branches
can have different numbers of balloons, and they can have different
number of hits in their balloons. So, we need to **NOT** necessarily
repeat `BALLOON:`, and instead use the written value of `BALLOON:`
directly. This way we can get the different values for each branch.

This fixes the parsing of Emma's Ura (and probably other songs). Also,
this revealed a bug in my parsing of Roppon no Bara to Sai no Uta, so I
needed to make sure we account for "duplicated" balloons too, and repeat
the values _only when necessary_.
This commit is contained in:
Viv 2024-10-26 12:34:03 -04:00 committed by GitHub
parent 4e4a90a1f7
commit 9b6f05b420
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 1675 additions and 13 deletions

View File

@ -42,5 +42,7 @@ disable = """
too-many-branches,
too-many-arguments,
too-many-locals,
too-many-statements
too-many-statements,
too-many-positional-arguments,
fixme
"""

View File

@ -195,6 +195,9 @@ def convert_tja_to_fumen(tja: TJACourse) -> FumenCourse:
len(b) for b in tja_branches_processed.values()
))
# Use a single copy of the course balloons (since we use .pop())
course_balloons = tja.balloon.copy()
# Iterate through the different branches in the TJA
total_notes = {'normal': 0, 'professional': 0, 'master': 0}
for current_branch, branch_tja in tja_branches_processed.items():
@ -209,7 +212,6 @@ def convert_tja_to_fumen(tja: TJACourse) -> FumenCourse:
current_levelhold = False
branch_types: List[str] = []
branch_conditions: List[Tuple[float, float]] = []
course_balloons = tja.balloon.copy()
# Iterate over pairs of TJA and Fumen measures
for idx_m, (measure_tja, measure_fumen) in \

View File

@ -33,7 +33,9 @@ def parse_tja(fname_tja: str) -> TJASong:
tja_lines = [line for line in tja_text.splitlines() if line.strip() != '']
tja = split_tja_lines_into_courses(tja_lines)
for course in tja.courses.values():
course.branches = parse_tja_course_data(course.data)
branches, balloon_data = parse_tja_course_data(course.data)
course.branches = branches
course.balloon = fix_balloon_field(course.balloon, balloon_data)
return tja
@ -181,7 +183,8 @@ def split_tja_lines_into_courses(lines: List[str]) -> TJASong:
return parsed_tja
def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
def parse_tja_course_data(data: List[str]) \
-> Tuple[Dict[str, List[TJAMeasure]], Dict[str, List[str]]]:
"""
Parse course data (notes, commands) into a nested song structure.
@ -208,6 +211,8 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
has_branches = bool([d for d in data if d.startswith('#BRANCH')])
current_branch = 'all' if has_branches else 'normal'
branch_condition = ''
# keep track of balloons in order to fix the 'BALLOON' field value
balloons: Dict[str, List[str]] = {k: [] for k in BRANCH_NAMES}
# Process course lines
idx_m = 0
@ -225,6 +230,7 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
# 1. Parse measure notes
if note_data:
notes_to_write: str = ""
# If measure has ended, then add notes to the current measure,
# then start a new measure by incrementing idx_m
if note_data.endswith(','):
@ -232,14 +238,26 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
else [current_branch]):
check_branch_length(parsed_branches, branch_name,
expected_len=idx_m+1)
parsed_branches[branch_name][idx_m].notes += note_data[:-1]
notes_to_write = note_data[:-1]
parsed_branches[branch_name][idx_m].notes += notes_to_write
parsed_branches[branch_name].append(TJAMeasure())
idx_m += 1
# Otherwise, keep adding notes to the current measure ('idx_m')
else:
for branch_name in (BRANCH_NAMES if current_branch == 'all'
else [current_branch]):
parsed_branches[branch_name][idx_m].notes += note_data
notes_to_write = note_data
parsed_branches[branch_name][idx_m].notes += notes_to_write
# Keep track of balloon notes that were added
balloon_notes = [n for n in notes_to_write if n in ['7', '9']]
# mark balloon notes as duplicates if necessary. this will be used
# to fix the BALLOON: field to account for duplicated balloons.
balloon_notes = (['DUPE'] * len(balloon_notes)
if current_branch == 'all' else balloon_notes)
for branch_name in (BRANCH_NAMES if current_branch == 'all'
else [current_branch]):
balloons[branch_name].extend(balloon_notes)
# 2. Parse measure commands that produce an "event"
elif command in ['GOGOSTART', 'GOGOEND', 'BARLINEON', 'BARLINEOFF',
@ -381,7 +399,7 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
"have in each branch.)"
)
return parsed_branches
return parsed_branches, balloons
def check_branch_length(parsed_branches: Dict[str, List[TJAMeasure]],
@ -425,6 +443,116 @@ def check_branch_length(parsed_branches: Dict[str, List[TJAMeasure]],
)
def fix_balloon_field(balloon_field: List[int],
balloon_data: Dict[str, List[str]]) -> List[int]:
"""
Fix the 'BALLOON:' metadata field for certain branching songs.
In Taiko, branching songs may have a different amount of balloons and/or
different balloon values on their normal/professional/master branches.
However, the TJA field "BALLOON:" is limited it how it can represent
balloon hits; it uses a single comma-delimited list of integers. E.g.:
BALLOON: 13,4,52,4,52,4,52
It is unclear which of these values belong to which branches.
This is especially unclear for songs that start out on the "normal" branch,
or songs that have branching conditions that force a specific branch. These
songs are often written as TJA with only a single branch written out, yet
for official fumens, this branch information actually has to be present on
*all three branches*. So, the 'BALLOON:' field will be missing values.
In the example above, the "13" balloon actually occurs on the normal branch
before the first branch condition. Meaning that the balloons are split up
like this:
BALLOON: (13,4,52)(4,52)(4,52)
However, due to fumen requirements, we want the balloons to actually be
like this:
BALLOON: (13,4,52)(13,4,52)(13,4,52)
So, the purpose of this function is to "fix" the balloon information so
that it can be used for fumen conversion without error.
NOTE: This fix probably only applies to a VERY small minority of songs.
One example (shown above) is the Ura chart for Roppon no Bara to Sai
no Uta. You can see in the wikiwiki that the opening 'Normal'
section has a balloon note prior to the branch condition. We need
to duplicate this value across all branches.
"""
# Return early if course doesn't have branches
if not all(balloon_data.values()):
return balloon_field
# Special case: Courses where the # of balloons is the same for all
# branches, and the TJA author only listed 1 set of balloons.
# Fix: Duplicate the balloons 3 times.
if all(len(balloons) == len(balloon_field)
for balloons in balloon_data.values()):
return balloon_field * 3
# Return early if there were no duplicated balloons in the course
if not any('DUPE' in balloons for balloons in balloon_data.values()):
return balloon_field
# If balloons were duplicated, then we expect the BALLOON: field to have
# fewer hits values than the number of balloons. If this *isn't* the case,
# then perhaps the TJA author duplicated the balloon hits themselves, and
# so we don't want to make any unnecessary edits. Thus, return early.
# FIXME: This assumption fails for double-kusudama notes, where we may
# see a "fake" balloon, thus inflating the total number of balloons.
# But, this is such a rare case (double-kusudama + duplicated
# balloons + 'BALLOON:' field with implicitly duplicated hits) that
# I'm alright handling it incorrectly. If a user files a bug
# report, then I'll fix it then.
total_num_balloons = sum(len(b) for b in balloon_data.values())
if not len(balloon_field) < total_num_balloons:
return balloon_field
# OK! So, by this point in the function, we're making these assumptions:
#
# 1. The TJA chart has branches.
# 2. The TJA author wrote part of the song for only a single branch
# (e.g. the Normal branch, before the first branch condition), and thus
# we needed to duplicate some of the note data to create a valid fumen.
# 3. The 'single branch' part of the TJA contained balloon/kusudama notes,
# and thus we needed to duplicate those notes.
# 4. The TJA author wrote the 'BALLOON:' field such that there was only 1
# balloon value for the duplicated balloon note.
#
# The goal now is to identify which balloons were duplicated, and make sure
# the "hits" value is present across all branches.
duplicated_balloons = []
balloon_field_fixed = []
# Handle the normal branch first
# If balloons are duplicated, then it's probably going to be from 'normal'
# FIXME: If the balloons are duplicated from the master/professional branch
# (e.g. due to a forced branch change from a branch condition), then
# this logic will read the balloon values incorrectly.
# But, this is such a rare case that I'm alright handling it
# incorrectly. If a user files a bug report, then I'll fix it then.
for balloon_note in balloon_data['normal']:
balloon_hits = balloon_field.pop(0)
if balloon_note == 'DUPE':
duplicated_balloons.append(balloon_hits)
balloon_field_fixed.append(balloon_hits)
# Repeat any duplicated balloon notes for the professional/master branches
for branch_name in ['professional', 'master']:
dupes_to_copy = duplicated_balloons.copy()
for balloon_note in balloon_data[branch_name]:
if balloon_note == 'DUPE':
balloon_field_fixed.append(dupes_to_copy.pop(0))
else:
balloon_field_fixed.append(balloon_field.pop(0))
return balloon_field_fixed
###############################################################################
# Fumen-parsing functions #
###############################################################################

1526
testing/data/emma.tja Normal file

File diff suppressed because it is too large Load Diff

BIN
testing/data/emma.zip Normal file

Binary file not shown.

View File

@ -519,7 +519,7 @@ SCOREDIFF:67
COURSE:Oni
LEVEL:6
BALLOON:7,9,5,3,3,15,3,3,4,23
BALLOON:7,13,7,4,4,21,4,4,2,35
SCOREINIT:520
SCOREDIFF:122
@ -689,7 +689,7 @@ SCOREDIFF:122
COURSE:Hard
LEVEL:3
BALLOON:30,14,6,16,18,18,18,18
BALLOON:30,14,9,16,18,18,18,18
SCOREINIT:570
SCOREDIFF:140

View File

@ -463,7 +463,7 @@ SCOREDIFF:117
COURSE:Normal
LEVEL:5
BALLOON:9,9
BALLOON:9,10
SCOREINIT:570
SCOREDIFF:157
@ -621,7 +621,7 @@ SCOREDIFF:157
COURSE:Easy
LEVEL:3
BALLOON:5,7,6
BALLOON:5,7,7
SCOREINIT:510
SCOREDIFF:155

View File

@ -777,7 +777,7 @@ SCOREDIFF:215
COURSE:Easy
LEVEL:4
BALLOON:20
BALLOON:18
SCOREINIT:710
SCOREDIFF:287

View File

@ -11,6 +11,7 @@ from tja2fumen.parsers import parse_fumen
@pytest.mark.parametrize('id_song', [
pytest.param('emma'),
pytest.param('butou5'),
pytest.param('shoto9',
marks=pytest.mark.skip("TJA measures do not match fumen.")),
@ -165,7 +166,10 @@ def test_converted_tja_vs_cached_fumen(id_song, tmp_path, entry_point):
i_branch, i_note, abv=25.0)
except AssertionError:
pass
if ca_note.note_type not in ["Balloon", "Kusudama"]:
if ca_note.note_type in ["Balloon", "Kusudama"]:
check(co_note, ca_note, 'hits', i_measure,
i_branch, i_note)
else:
check(co_note, ca_note, 'score_init', i_measure,
i_branch, i_note)
check(co_note, ca_note, 'score_diff', i_measure,