Fix parsing of BALLOON: so that values are correctly read for all 3 branches (#80)

I was making a bad assumption: 1. Songs with branches have the same number of balloons. 2. Each balloon has the same number of hits across branches. Because I was making these assumptions, I thought I could just repeat the `BALLOON:` field for each branch. **But this is wrong!!** Branches can have different numbers of balloons, and they can have different number of hits in their balloons. So, we need to **NOT** necessarily repeat `BALLOON:`, and instead use the written value of `BALLOON:` directly. This way we can get the different values for each branch. This fixes the parsing of Emma's Ura (and probably other songs). Also, this revealed a bug in my parsing of Roppon no Bara to Sai no Uta, so I needed to make sure we account for "duplicated" balloons too, and repeat the values _only when necessary_.
2024-11-23 21:20:56 +01:00 · 2024-10-26 12:34:03 -04:00 · 2024-10-26 12:34:03 -04:00 · 9b6f05b420
commit 9b6f05b420
parent 4e4a90a1f7
9 changed files with 1675 additions and 13 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -42,5 +42,7 @@ disable = """
    too-many-branches,
    too-many-arguments,
    too-many-locals,
-    too-many-statements
+    too-many-statements,
    too-many-positional-arguments,
    fixme
 """
--- a/src/tja2fumen/converters.py
+++ b/src/tja2fumen/converters.py
@ -195,6 +195,9 @@ def convert_tja_to_fumen(tja: TJACourse) -> FumenCourse:
        len(b) for b in tja_branches_processed.values()
    ))
    # Use a single copy of the course balloons (since we use .pop())
    course_balloons = tja.balloon.copy()
    # Iterate through the different branches in the TJA
    total_notes = {'normal': 0, 'professional': 0, 'master': 0}
    for current_branch, branch_tja in tja_branches_processed.items():
@ -209,7 +212,6 @@ def convert_tja_to_fumen(tja: TJACourse) -> FumenCourse:
        current_levelhold = False
        branch_types: List[str] = []
        branch_conditions: List[Tuple[float, float]] = []
        course_balloons = tja.balloon.copy()
        # Iterate over pairs of TJA and Fumen measures
        for idx_m, (measure_tja, measure_fumen) in \
--- a/src/tja2fumen/parsers.py
+++ b/src/tja2fumen/parsers.py
@ -33,7 +33,9 @@ def parse_tja(fname_tja: str) -> TJASong:
    tja_lines = [line for line in tja_text.splitlines() if line.strip() != '']
    tja = split_tja_lines_into_courses(tja_lines)
    for course in tja.courses.values():
-        course.branches = parse_tja_course_data(course.data)
+        branches, balloon_data = parse_tja_course_data(course.data)
        course.branches = branches
        course.balloon = fix_balloon_field(course.balloon, balloon_data)
    return tja
@ -181,7 +183,8 @@ def split_tja_lines_into_courses(lines: List[str]) -> TJASong:
    return parsed_tja
-def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
+def parse_tja_course_data(data: List[str]) \
        -> Tuple[Dict[str, List[TJAMeasure]], Dict[str, List[str]]]:
    """
    Parse course data (notes, commands) into a nested song structure.
@ -208,6 +211,8 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
    has_branches = bool([d for d in data if d.startswith('#BRANCH')])
    current_branch = 'all' if has_branches else 'normal'
    branch_condition = ''
    # keep track of balloons in order to fix the 'BALLOON' field value
    balloons: Dict[str, List[str]] = {k: [] for k in BRANCH_NAMES}
    # Process course lines
    idx_m = 0
@ -225,6 +230,7 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
        # 1. Parse measure notes
        if note_data:
            notes_to_write: str = ""
            # If measure has ended, then add notes to the current measure,
            # then start a new measure by incrementing idx_m
            if note_data.endswith(','):
@ -232,14 +238,26 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
                                    else [current_branch]):
                    check_branch_length(parsed_branches, branch_name,
                                        expected_len=idx_m+1)
-                    parsed_branches[branch_name][idx_m].notes += note_data[:-1]
+                    notes_to_write = note_data[:-1]
                    parsed_branches[branch_name][idx_m].notes += notes_to_write
                    parsed_branches[branch_name].append(TJAMeasure())
                idx_m += 1
            # Otherwise, keep adding notes to the current measure ('idx_m')
            else:
                for branch_name in (BRANCH_NAMES if current_branch == 'all'
                                    else [current_branch]):
-                    parsed_branches[branch_name][idx_m].notes += note_data
+                    notes_to_write = note_data
                    parsed_branches[branch_name][idx_m].notes += notes_to_write
            # Keep track of balloon notes that were added
            balloon_notes = [n for n in notes_to_write if n in ['7', '9']]
            # mark balloon notes as duplicates if necessary. this will be used
            # to fix the BALLOON: field to account for duplicated balloons.
            balloon_notes = (['DUPE'] * len(balloon_notes)
                             if current_branch == 'all' else balloon_notes)
            for branch_name in (BRANCH_NAMES if current_branch == 'all'
                                else [current_branch]):
                balloons[branch_name].extend(balloon_notes)
        # 2. Parse measure commands that produce an "event"
        elif command in ['GOGOSTART', 'GOGOEND', 'BARLINEON', 'BARLINEOFF',
@ -381,7 +399,7 @@ def parse_tja_course_data(data: List[str]) -> Dict[str, List[TJAMeasure]]:
                "have in each branch.)"
            )
-    return parsed_branches
+    return parsed_branches, balloons
 def check_branch_length(parsed_branches: Dict[str, List[TJAMeasure]],
@ -425,6 +443,116 @@ def check_branch_length(parsed_branches: Dict[str, List[TJAMeasure]],
        )
 def fix_balloon_field(balloon_field: List[int],
                      balloon_data: Dict[str, List[str]]) -> List[int]:
    """
    Fix the 'BALLOON:' metadata field for certain branching songs.
    In Taiko, branching songs may have a different amount of balloons and/or
    different balloon values on their normal/professional/master branches.
    However, the TJA field "BALLOON:" is limited it how it can represent
    balloon hits; it uses a single comma-delimited list of integers. E.g.:
    BALLOON: 13,4,52,4,52,4,52
    It is unclear which of these values belong to which branches.
    This is especially unclear for songs that start out on the "normal" branch,
    or songs that have branching conditions that force a specific branch. These
    songs are often written as TJA with only a single branch written out, yet
    for official fumens, this branch information actually has to be present on
    *all three branches*. So, the 'BALLOON:' field will be missing values.
    In the example above, the "13" balloon actually occurs on the normal branch
    before the first branch condition. Meaning that the balloons are split up
    like this:
    BALLOON: (13,4,52)(4,52)(4,52)
    However, due to fumen requirements, we want the balloons to actually be
    like this:
    BALLOON: (13,4,52)(13,4,52)(13,4,52)
    So, the purpose of this function is to "fix" the balloon information so
    that it can be used for fumen conversion without error.
    NOTE: This fix probably only applies to a VERY small minority of songs.
          One example (shown above) is the Ura chart for Roppon no Bara to Sai
          no Uta. You can see in the wikiwiki that the opening 'Normal'
          section has a balloon note prior to the branch condition. We need
          to duplicate this value across all branches.
    """
    # Return early if course doesn't have branches
    if not all(balloon_data.values()):
        return balloon_field
    # Special case: Courses where the # of balloons is the same for all
    # branches, and the TJA author only listed 1 set of balloons.
    # Fix: Duplicate the balloons 3 times.
    if all(len(balloons) == len(balloon_field)
           for balloons in balloon_data.values()):
        return balloon_field * 3
    # Return early if there were no duplicated balloons in the course
    if not any('DUPE' in balloons for balloons in balloon_data.values()):
        return balloon_field
    # If balloons were duplicated, then we expect the BALLOON: field to have
    # fewer hits values than the number of balloons. If this *isn't* the case,
    # then perhaps the TJA author duplicated the balloon hits themselves, and
    # so we don't want to make any unnecessary edits. Thus, return early.
    # FIXME: This assumption fails for double-kusudama notes, where we may
    #        see a "fake" balloon, thus inflating the total number of balloons.
    #        But, this is such a rare case (double-kusudama + duplicated
    #        balloons + 'BALLOON:' field with implicitly duplicated hits) that
    #        I'm alright handling it incorrectly. If a user files a bug
    #        report, then I'll fix it then.
    total_num_balloons = sum(len(b) for b in balloon_data.values())
    if not len(balloon_field) < total_num_balloons:
        return balloon_field
    # OK! So, by this point in the function, we're making these assumptions:
    #
    # 1. The TJA chart has branches.
    # 2. The TJA author wrote part of the song for only a single branch
    #    (e.g. the Normal branch, before the first branch condition), and thus
    #    we needed to duplicate some of the note data to create a valid fumen.
    # 3. The 'single branch' part of the TJA contained balloon/kusudama notes,
    #    and thus we needed to duplicate those notes.
    # 4. The TJA author wrote the 'BALLOON:' field such that there was only 1
    #    balloon value for the duplicated balloon note.
    #
    # The goal now is to identify which balloons were duplicated, and make sure
    # the "hits" value is present across all branches.
    duplicated_balloons = []
    balloon_field_fixed = []
    # Handle the normal branch first
    # If balloons are duplicated, then it's probably going to be from 'normal'
    # FIXME: If the balloons are duplicated from the master/professional branch
    #        (e.g. due to a forced branch change from a branch condition), then
    #        this logic will read the balloon values incorrectly.
    #        But, this is such a rare case that I'm alright handling it
    #        incorrectly. If a user files a bug report, then I'll fix it then.
    for balloon_note in balloon_data['normal']:
        balloon_hits = balloon_field.pop(0)
        if balloon_note == 'DUPE':
            duplicated_balloons.append(balloon_hits)
        balloon_field_fixed.append(balloon_hits)
    # Repeat any duplicated balloon notes for the professional/master branches
    for branch_name in ['professional', 'master']:
        dupes_to_copy = duplicated_balloons.copy()
        for balloon_note in balloon_data[branch_name]:
            if balloon_note == 'DUPE':
                balloon_field_fixed.append(dupes_to_copy.pop(0))
            else:
                balloon_field_fixed.append(balloon_field.pop(0))
    return balloon_field_fixed
 ###############################################################################
 #                          Fumen-parsing functions                            #
 ###############################################################################
--- a/testing/data/emma.tja
+++ b/testing/data/emma.tja
--- a/testing/data/emma.zip
+++ b/testing/data/emma.zip
--- a/testing/data/hol6po.tja
+++ b/testing/data/hol6po.tja
@ -519,7 +519,7 @@ SCOREDIFF:67
 COURSE:Oni
 LEVEL:6
-BALLOON:7,9,5,3,3,15,3,3,4,23
+BALLOON:7,13,7,4,4,21,4,4,2,35
 SCOREINIT:520
 SCOREDIFF:122
@ -689,7 +689,7 @@ SCOREDIFF:122
 COURSE:Hard
 LEVEL:3
-BALLOON:30,14,6,16,18,18,18,18
+BALLOON:30,14,9,16,18,18,18,18
 SCOREINIT:570
 SCOREDIFF:140
--- a/testing/data/ia6cho.tja
+++ b/testing/data/ia6cho.tja
@ -463,7 +463,7 @@ SCOREDIFF:117
 COURSE:Normal
 LEVEL:5
-BALLOON:9,9
+BALLOON:9,10
 SCOREINIT:570
 SCOREDIFF:157
@ -621,7 +621,7 @@ SCOREDIFF:157
 COURSE:Easy
 LEVEL:3
-BALLOON:5,7,6
+BALLOON:5,7,7
 SCOREINIT:510
 SCOREDIFF:155
--- a/testing/data/linda.tja
+++ b/testing/data/linda.tja
@ -777,7 +777,7 @@ SCOREDIFF:215
 COURSE:Easy
 LEVEL:4
-BALLOON:20
+BALLOON:18
 SCOREINIT:710
 SCOREDIFF:287
--- a/testing/test_conversion.py
+++ b/testing/test_conversion.py
@ -11,6 +11,7 @@ from tja2fumen.parsers import parse_fumen
@pytest.mark.parametrize('id_song', [
    pytest.param('emma'),
    pytest.param('butou5'),
    pytest.param('shoto9',
                 marks=pytest.mark.skip("TJA measures do not match fumen.")),
@ -165,7 +166,10 @@ def test_converted_tja_vs_cached_fumen(id_song, tmp_path, entry_point):
                              i_branch, i_note, abv=25.0)
                    except AssertionError:
                        pass
-                    if ca_note.note_type not in ["Balloon", "Kusudama"]:
+                    if ca_note.note_type in ["Balloon", "Kusudama"]:
                        check(co_note, ca_note, 'hits', i_measure,
                              i_branch, i_note)
                    else:
                        check(co_note, ca_note, 'score_init', i_measure,
                              i_branch, i_note)
                        check(co_note, ca_note, 'score_diff', i_measure,