Refactor TJA parsing code to enable the development of further features (#19)

2025-01-24 07:04:09 +01:00 · 2023-06-30 14:51:57 -04:00 · 2023-06-30 14:51:57 -04:00 · 4a5a3399aa
commit 4a5a3399aa
parent 1510a62bf5
3 changed files with 193 additions and 261 deletions
--- a/src/tja2fumen/constants.py
+++ b/src/tja2fumen/constants.py
@ -1,11 +1,3 @@
 # Various commands and header fields for TJA files
 HEADER_GLOBAL = ['TITLE', 'TITLEJA', 'SUBTITLE', 'SUBTITLEJA', 'BPM', 'WAVE', 'OFFSET', 'DEMOSTART', 'GENRE',
                 'SCOREMODE', 'BGMOVIE', 'SONGVOL', 'SEVOL']
 HEADER_COURSE = ['COURSE', 'LEVEL', 'BALLOON', 'SCOREINIT', 'SCOREDIFF', 'STYLE']
 BRANCH_COMMANDS = ['START', 'END', 'BRANCHSTART', 'BRANCHEND', 'N', 'E', 'M', 'SECTION']
 MEASURE_COMMANDS = ['MEASURE', 'GOGOSTART', 'GOGOEND', 'BARLINEON', 'BARLINEOFF', 'SCROLL', 'BPMCHANGE', 'DELAY', 'LEVELHOLD']
 COMMAND = BRANCH_COMMANDS + MEASURE_COMMANDS
 # Note types for TJA files
 TJA_NOTE_TYPES = {
    '1': 'Don',
--- a/src/tja2fumen/converters.py
+++ b/src/tja2fumen/converters.py
@ -1,7 +1,8 @@
 from copy import deepcopy
 import re
 from tja2fumen.utils import computeSoulGaugeBytes
-from tja2fumen.constants import TJA_NOTE_TYPES, DIFFICULTY_BYTES, sampleHeaderMetadata, simpleHeaders
+from tja2fumen.constants import DIFFICULTY_BYTES, sampleHeaderMetadata, simpleHeaders
 # Filler metadata that the `writeFumen` function expects
 # TODO: Determine how to properly set the item byte (https://github.com/vivaria/tja2fumen/issues/17)
@ -22,7 +23,7 @@ default_measure = {
 }
-def preprocessTJAMeasures(tja):
+def processTJACommands(tja):
    """
    Merge TJA 'data' and 'event' fields into a single measure property, and split
    measures into sub-measures whenever a mid-measure BPM change occurs.
@ -40,35 +41,20 @@ def preprocessTJAMeasures(tja):
    In the future, this logic should probably be moved into the TJA parser itself.
    """
-    currentBPM = 0
+    currentBPM = float(tja['metadata']['bpm'])
    currentScroll = 1.0
    currentGogo = False
    currentBarline = True
    currentDividend = 4
    currentDivisor = 4
    measuresCorrected = []
    for measure in tja['measures']:
-        # Step 1: Combine notes and events
+        # Split measure into submeasure
        notes = [{'pos': i, 'type': 'note', 'value': TJA_NOTE_TYPES[note]}
                 for i, note in enumerate(measure['data']) if note != '0']
        events = [{'pos': e['position'], 'type': e['name'], 'value': e['value']}
                  for e in measure['events']]
        combined = []
        while notes or events:
            if events and notes:
                if notes[0]['pos'] >= events[0]['pos']:
                    combined.append(events.pop(0))
                else:
                    combined.append(notes.pop(0))
            elif events:
                combined.append(events.pop(0))
            elif notes:
                combined.append(notes.pop(0))
        # Step 2: Split measure into submeasure
        measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline,
                       'subdivisions': len(measure['data']), 'pos_start': 0, 'pos_end': 0,
-                       'time_sig': measure['length'], 'data': []}
+                       'time_sig': [currentDividend, currentDivisor], 'data': []}
-        for data in combined:
+        for data in measure['combined']:
            if data['type'] == 'note':
                measure_cur['data'].append(data)
                # Update the current measure's SCROLL/GOGO/BARLINE status.
@ -96,7 +82,14 @@ def preprocessTJAMeasures(tja):
                    measuresCorrected.append(measure_cur)
                    measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline,
                                   'subdivisions': len(measure['data']), 'pos_start': data['pos'], 'pos_end': 0,
-                                   'time_sig': measure['length'], 'data': []}
+                                   'time_sig': [currentDividend, currentDivisor], 'data': []}
            elif data['type'] == 'measure':
                matchMeasure = re.match(r"(\d+)/(\d+)", data['value'])
                if not matchMeasure:
                    continue
                currentDividend = int(matchMeasure.group(1))
                currentDivisor = int(matchMeasure.group(2))
                measure_cur['time_sig'] = [currentDividend, currentDivisor]
            elif data['type'] == 'scroll':
                currentScroll = data['value']
            elif data['type'] == 'gogo':
@ -114,11 +107,12 @@ def preprocessTJAMeasures(tja):
 def convertTJAToFumen(tja):
    # Hardcode currentBranch due to current lack of support for branching songs
    currentBranch = 'normal'  # TODO: Program in branch support
    tja['measures'] = preprocessTJAMeasures(tja)
    measureDurationPrev = 0
    currentDrumroll = None
    total_notes = 0
    tja['measures'] = processTJACommands(tja)
    # Parse TJA measures to create converted TJA -> Fumen file
    tjaConverted = {'measures': []}
    for idx_m, measureTJA in enumerate(tja['measures']):
@ -188,8 +182,8 @@ def convertTJAToFumen(tja):
                note = deepcopy(default_note)
                note['pos'] = note_pos
                note['type'] = data['value']
-                note['scoreInit'] = tja['scoreInit']  # Probably not fully accurate
+                note['scoreInit'] = tja['metadata']['scoreInit']  # Probably not fully accurate
-                note['scoreDiff'] = tja['scoreDiff']  # Probably not fully accurate
+                note['scoreDiff'] = tja['metadata']['scoreDiff']  # Probably not fully accurate
                # Handle drumroll/balloon-specific metadata
                if note['type'] in ["Balloon", "Kusudama"]:
                    note['hits'] = tja['metadata']['balloon'].pop(0)
@ -234,7 +228,7 @@ def convertTJAToFumen(tja):
    tjaConverted['order'] = '<'
    tjaConverted['unknownMetadata'] = 0
    tjaConverted['branches'] = False
-    tjaConverted['scoreInit'] = tja['scoreInit']
+    tjaConverted['scoreInit'] = tja['metadata']['scoreInit']
-    tjaConverted['scoreDiff'] = tja['scoreDiff']
+    tjaConverted['scoreDiff'] = tja['metadata']['scoreDiff']
    return tjaConverted
--- a/src/tja2fumen/parsers.py
+++ b/src/tja2fumen/parsers.py
@ -2,12 +2,7 @@ import os
 import re
 from tja2fumen.utils import readStruct, getBool, shortHex
-from tja2fumen.constants import (
+from tja2fumen.constants import NORMALIZE_COURSE, TJA_NOTE_TYPES, branchNames, noteTypes
    # TJA constants
    HEADER_GLOBAL, HEADER_COURSE, BRANCH_COMMANDS, MEASURE_COMMANDS, COMMAND, NORMALIZE_COURSE,
    # Fumen constants
    branchNames, noteTypes
 )
 ########################################################################################################################
@ -21,252 +16,203 @@ def parseTJA(fnameTJA):
    except UnicodeDecodeError:
        tja = open(fnameTJA, "r", encoding="shift-jis")
-    # Split into lines
+    lines = [line for line in tja.read().splitlines() if line.strip() != '']
-    lines = tja.read().splitlines()
+    courses = getCourseData(lines)
-    lines = [line for line in lines if line.strip()]  # Discard empty lines
+    for courseData in courses.values():
        courseData['measures'] = parseCourseMeasures(courseData['measures'])
-    # Line by line
+    return courses
-    headers = {}
+
 def getCourseData(lines):
    courses = {}
    currentCourse = ''
    songBPM = 0
    songOffset = 0
    for line in lines:
-        parsed = parseLine(line)
+        # Case 1: Header metadata
-        # Case 1: Comments (ignore)
+        match_header = re.match(r"^([A-Z]+):(.*)", line)
-        if parsed['type'] == 'comment':
+        if match_header:
-            pass
+            nameUpper = match_header.group(1).upper()
-        # Case 2: Global header metadata
+            value = match_header.group(2).strip()
-        elif parsed['type'] == 'header' and parsed['scope'] == 'global':
+
-            headers[parsed['name'].lower()] = parsed['value']
+            # Global header fields
-        # Case 3: Course data (metadata, commands, note data)
+            if nameUpper == 'BPM':
-        else:
+                songBPM = value
-            # Check to see if we're starting a new course
+            elif nameUpper == 'OFFSET':
-            if parsed['type'] == 'header' and parsed['scope'] == 'course' and parsed['name'] == 'COURSE':
+                songOffset = value
-                currentCourse = NORMALIZE_COURSE[parsed['value']]
+
            # Course-specific header fields
            elif nameUpper == 'COURSE':
                currentCourse = NORMALIZE_COURSE[value]
                if currentCourse not in courses.keys():
-                    courses[currentCourse] = []
+                    courses[currentCourse] = {
-            # Append the line to the current course
+                        'metadata': {'course': currentCourse, 'bpm': songBPM, 'offset': songOffset, 'level': 0,
-            courses[currentCourse].append(parsed)
+                                     'balloon': [], 'scoreInit': 0, 'scoreDiff': 0},
-
+                        'measures': [],
-    # Convert parsed course lines into actual note data
+                    }
-    songs = {}
+            elif nameUpper == 'LEVEL':
-    for courseName, courseLines in courses.items():
+                courses[currentCourse]['metadata']['level'] = int(value) if value else 0
-        courseHeader, courseMeasures = getCourse(headers, courseLines)
+            elif nameUpper == 'SCOREINIT':
-        songs[courseName] = applyFumenStructureToParsedTJA(headers, courseHeader, courseMeasures)
+                courses[currentCourse]['metadata']['scoreInit'] = int(value) if value else 0
-
+            elif nameUpper == 'SCOREDIFF':
-    return songs
+                courses[currentCourse]['metadata']['scoreDiff'] = int(value) if value else 0
-
+            elif nameUpper == 'BALLOON':
-
+                if value:
-def parseLine(line):
+                    balloons = [int(v) for v in value.split(",") if v]
-    # Regex matches for various line types
+                    courses[currentCourse]['metadata']['balloon'] = balloons
-    match_comment = re.match(r"//.*", line)
+            # STYLE is a P1/P2 command, which we don't support yet, so normally this would be a
-    match_header = re.match(r"^([A-Z]+):(.*)", line)
+            # NotImplemetedError. However, TakoTako outputs `STYLE:SINGLE` when converting Ura
-    match_command = re.match(r"^#([A-Z]+)(?:\s+(.+))?", line)
+            # charts, so throwing an error here would prevent Ura charts from being converted.
-    match_data = re.match(r"^(([0-9]|A|B|C|F|G)*,?).*$", line)
+            # See: https://github.com/vivaria/tja2fumen/issues/15#issuecomment-1575341088
-
+            elif nameUpper == 'STYLE':
-    if match_comment:
+                pass
        return {"type": 'comment', "value": line}
    elif match_header:
        nameUpper = match_header.group(1).upper()
        value = match_header.group(2)
        if nameUpper in HEADER_GLOBAL:
            return {"type": 'header', "scope": 'global', "name": nameUpper, "value": value.strip()}
        elif nameUpper in HEADER_COURSE:
            return {"type": 'header', "scope": 'course', "name": nameUpper, "value": value.strip()}
    elif match_command:
        nameUpper = match_command.group(1).upper()
        value = match_command.group(2) if match_command.group(2) else ''
        if nameUpper in COMMAND:
            return {"type": 'command', "name": nameUpper, "value": value.strip()}
    elif match_data:
        return {"type": 'data', "data": match_data.group(1)}
    return {"type": 'unknown', "value": line}
 def getCourse(tjaHeaders, lines):
    def parseHeaderMetadata(line):
        nonlocal headers
        if line["name"] == 'COURSE':
            headers['course'] = NORMALIZE_COURSE[line['value']]
        elif line["name"] == 'LEVEL':
            headers['level'] = int(line['value']) if line['value'] else 0
        elif line["name"] == 'SCOREINIT':
            headers['scoreInit'] = int(line['value']) if line['value'] else 0
        elif line["name"] == 'SCOREDIFF':
            headers['scoreDiff'] = int(line['value']) if line['value'] else 0
        elif line["name"] == 'BALLOON':
            if line['value']:
                balloons = [int(v) for v in line['value'].split(",") if v]
            else:
-                balloons = []
+                pass  # Ignore other header fields such as 'TITLE', 'SUBTITLE', 'WAVE', etc.
            headers['balloon'] = balloons
        # STYLE is a P1/P2 command, which we don't support yet, so normally this would be a NotImplemetedError.
        # However, TakoTako outputs `STYLE:SINGLE` when converting Ura charts, so throwing an error here prevents
        # Ura charts from being converted. See: https://github.com/vivaria/tja2fumen/issues/15#issuecomment-1575341088
        elif line["name"] == 'STYLE':
            pass
        else:
            raise NotImplementedError
-    def parseBranchCommands(line):
+        # Case 2: Commands and note data (to be further processed course-by-course later on)
-        nonlocal flagLevelhold, targetBranch, currentBranch
+        elif not re.match(r"//.*", line):  # Exclude comment-only lines ('//')
-        if line["name"] == 'BRANCHSTART':
+            match_command = re.match(r"^#([A-Z]+)(?:\s+(.+))?", line)
-            if flagLevelhold:
+            match_notes = re.match(r"^(([0-9]|A|B|C|F|G)*,?).*$", line)
-                return
+            if match_command:
-            values = line['value'].split(',')
+                nameUpper = match_command.group(1).upper()
-            if values[0] == 'r':
+                value = match_command.group(2).strip() if match_command.group(2) else ''
-                if len(values) >= 3:
+            elif match_notes:
-                    targetBranch = 'M'
+                nameUpper = 'NOTES'
-                elif len(values) == 2:
+                value = match_notes.group(1)
-                    targetBranch = 'E'
+            courses[currentCourse]['measures'].append({"name": nameUpper, "value": value})
                else:
                    targetBranch = 'N'
            elif values[0] == 'p':
                if len(values) >= 3 and float(values[2]) <= 100:
                    targetBranch = 'M'
                elif len(values) >= 2 and float(values[1]) <= 100:
                    targetBranch = 'E'
                else:
                    targetBranch = 'N'
        elif line["name"] == 'BRANCHEND':
            currentBranch = targetBranch
        elif line["name"] == 'N':
            currentBranch = 'N'
        elif line["name"] == 'E':
            currentBranch = 'E'
        elif line["name"] == 'M':
            currentBranch = 'M'
        elif line["name"] == 'START' or line['name'] == 'END':
            currentBranch = 'N'
            targetBranch = 'N'
            flagLevelhold = False
        elif line['name'] == 'SECTION':
            raise NotImplementedError
        else:
            raise NotImplementedError
-    def parseMeasureCommands(line):
+    return courses
        nonlocal measureDivisor, measureDividend, measureEvents, flagLevelhold
        if line['name'] == 'MEASURE':
            matchMeasure = re.match(r"(\d+)/(\d+)", line['value'])
            if not matchMeasure:
                return
            measureDividend = int(matchMeasure.group(1))
            measureDivisor = int(matchMeasure.group(2))
        elif line['name'] == 'GOGOSTART':
            measureEvents.append({"name": 'gogo', "position": len(measureData), "value": '1'})
        elif line['name'] == 'GOGOEND':
            measureEvents.append({"name": 'gogo', "position": len(measureData), "value": '0'})
        elif line['name'] == 'BARLINEON':
            measureEvents.append({"name": 'barline', "position": len(measureData), "value": '1'})
        elif line['name'] == 'BARLINEOFF':
            measureEvents.append({"name": 'barline', "position": len(measureData), "value": '0'})
        elif line['name'] == 'SCROLL':
            measureEvents.append({"name": 'scroll', "position": len(measureData), "value": float(line['value'])})
        elif line['name'] == 'BPMCHANGE':
            measureEvents.append({"name": 'bpm', "position": len(measureData), "value": float(line['value'])})
        elif line['name'] == 'LEVELHOLD':
            flagLevelhold = True
        elif line['name'] == 'DELAY':
            raise NotImplementedError
        elif line['name'] == 'LYRIC':
            pass
        elif line['name'] == 'NEXTSONG':
            pass
        else:
            raise NotImplementedError
    def parseMeasureData(line):
        nonlocal measures, measureData, measureDividend, measureDivisor, measureEvents
        data = line['data']
        # If measure has ended, then append the measure and start anew
        if data.endswith(','):
            measureData += data[0:-1]
            measure = {
                "length": [measureDividend, measureDivisor],
                "data": measureData,
                "events": measureEvents,
            }
            measures.append(measure)
            measureData = ''
            measureEvents = []
        # Otherwise, keep tracking measureData
        else:
            measureData += data
 def parseCourseMeasures(lines):
    # Define state variables
    headers = {'balloon': []}  # Charters sometimes exclude `BALLOON` entirely if there are none
    measures = []
    measureDividend = 4
    measureDivisor = 4
    measureData = ''
    measureEvents = []
    currentBranch = 'N'
    targetBranch = 'N'
    flagLevelhold = False
    # Process course lines
    measures = []
    measureNotes = ''
    measureEvents = []
    for line in lines:
-        if line["type"] == 'header':
+        assert currentBranch == targetBranch
-            parseHeaderMetadata(line)
+        # 1. Parse measure notes
-        elif line["type"] == 'command' and line['name'] in BRANCH_COMMANDS:
+        if line['name'] == 'NOTES':
-            parseBranchCommands(line)
+            notes = line['value']
-        elif line["type"] == 'command' and line['name'] in MEASURE_COMMANDS and currentBranch == targetBranch:
+            # If measure has ended, then append the measure and start anew
-            parseMeasureCommands(line)
+            if notes.endswith(','):
-        elif line['type'] == 'data' and currentBranch == targetBranch:
+                measureNotes += notes[0:-1]
-            parseMeasureData(line)
+                measure = {
                    "data": measureNotes,
                    "events": measureEvents,
                }
                measures.append(measure)
                measureNotes = ''
                measureEvents = []
            # Otherwise, keep tracking measureNotes
            else:
                measureNotes += notes
-    # Post-processing: Ensure the first measure has a BPM event
+        # 2. Parse commands
-    if measures:
+        else:
-        firstBPMEventFound = False
+            # Measure commands
-        # Search for BPM event in the first measure
+            if line['name'] == 'GOGOSTART':
-        for i in range(len(measures[0]['events'])):
+                measureEvents.append({"name": 'gogo', "position": len(measureNotes), "value": '1'})
-            evt = measures[0]['events'][i]
+            elif line['name'] == 'GOGOEND':
-            if evt['name'] == 'bpm' and evt['position'] == 0:
+                measureEvents.append({"name": 'gogo', "position": len(measureNotes), "value": '0'})
-                firstBPMEventFound = True
+            elif line['name'] == 'BARLINEON':
-        # If not present, insert a BPM event into the first measure using the global header metadata
+                measureEvents.append({"name": 'barline', "position": len(measureNotes), "value": '1'})
-        if not firstBPMEventFound:
+            elif line['name'] == 'BARLINEOFF':
-            # noinspection PyTypeChecker
+                measureEvents.append({"name": 'barline', "position": len(measureNotes), "value": '0'})
-            measures[0]['events'].insert(0, {"name": 'bpm', "position": 0, "value": tjaHeaders['bpm']})
+            elif line['name'] == 'SCROLL':
                measureEvents.append({"name": 'scroll', "position": len(measureNotes), "value": float(line['value'])})
            elif line['name'] == 'BPMCHANGE':
                measureEvents.append({"name": 'bpm', "position": len(measureNotes), "value": float(line['value'])})
            elif line['name'] == 'MEASURE':
                measureEvents.append({"name": 'measure', "position": len(measureNotes), "value": line['value']})
-    # Post-processing: In case the file doesn't end on a "measure end" symbol (','), append whatever is left
+            # Branch commands
-    if measureData:
+            elif line["name"] == 'START' or line['name'] == 'END':
                currentBranch = 'N'
                targetBranch = 'N'
                flagLevelhold = False
            elif line['name'] == 'LEVELHOLD':
                flagLevelhold = True
            elif line["name"] == 'N':
                currentBranch = 'N'
            elif line["name"] == 'E':
                currentBranch = 'E'
            elif line["name"] == 'M':
                currentBranch = 'M'
            elif line["name"] == 'BRANCHEND':
                currentBranch = targetBranch
            elif line["name"] == 'BRANCHSTART':
                if flagLevelhold:
                    continue
                values = line['value'].split(',')
                if values[0] == 'r':
                    if len(values) >= 3:
                        targetBranch = 'M'
                    elif len(values) == 2:
                        targetBranch = 'E'
                    else:
                        targetBranch = 'N'
                elif values[0] == 'p':
                    if len(values) >= 3 and float(values[2]) <= 100:
                        targetBranch = 'M'
                    elif len(values) >= 2 and float(values[1]) <= 100:
                        targetBranch = 'E'
                    else:
                        targetBranch = 'N'
            # Ignored commands
            elif line['name'] == 'LYRIC':
                pass
            elif line['name'] == 'NEXTSONG':
                pass
            # Not implemented commands
            elif line['name'] == 'SECTION':
                raise NotImplementedError
            elif line['name'] == 'DELAY':
                raise NotImplementedError
            else:
                raise NotImplementedError
    # If there is measure data (i.e. the file doesn't end on a "measure end" symbol ','), append whatever is left
    if measureNotes:
        measures.append({
-            "length": [measureDividend, measureDivisor],
+            "data": measureNotes,
            "data": measureData,
            "events": measureEvents,
        })
-
+    # Otherwise, if the file ends on a measure event (e.g. #GOGOEND), append any remaining events
    # Post-processing: Otherwise, if the file ends on a measure event (e.g. #GOGOEND), append any remaining events
    elif measureEvents:
        for event in measureEvents:
            event['position'] = len(measures[len(measures) - 1]['data'])
            # noinspection PyTypeChecker
            measures[len(measures) - 1]['events'].append(event)
-    return headers, measures
+    # Merge measure data and measure events in chronological order
    for measure in measures:
        notes = [{'pos': i, 'type': 'note', 'value': TJA_NOTE_TYPES[note]}
                 for i, note in enumerate(measure['data']) if note != '0']
        events = [{'pos': e['position'], 'type': e['name'], 'value': e['value']}
                  for e in measure['events']]
        combined = []
        while notes or events:
            if events and notes:
                if notes[0]['pos'] >= events[0]['pos']:
                    combined.append(events.pop(0))
                else:
                    combined.append(notes.pop(0))
            elif events:
                combined.append(events.pop(0))
            elif notes:
                combined.append(notes.pop(0))
        measure['combined'] = combined
-
+    return measures
 def applyFumenStructureToParsedTJA(globalHeader, courseHeader, measures):
    """Merge song metadata, course metadata, and course data into a single fumen-like object."""
    song = {'measures': [], 'metadata': {}}
    for k, v in globalHeader.items():
        song['metadata'][k] = v
    for k, v in courseHeader.items():
        if k in ['scoreInit', 'scoreDiff']:
            song[k] = v
        else:
            song['metadata'][k] = v
    for i, measure in enumerate(measures):
        song['measures'].append(measure)
    return song
 ########################################################################################################################