1
0
mirror of synced 2025-01-24 15:12:19 +01:00

Refactor TJA parsing code to enable the development of further features (#19)

This commit is contained in:
Viv 2023-06-30 14:51:57 -04:00 committed by GitHub
parent 1510a62bf5
commit 4a5a3399aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 193 additions and 261 deletions

View File

@ -1,11 +1,3 @@
# Various commands and header fields for TJA files
HEADER_GLOBAL = ['TITLE', 'TITLEJA', 'SUBTITLE', 'SUBTITLEJA', 'BPM', 'WAVE', 'OFFSET', 'DEMOSTART', 'GENRE',
'SCOREMODE', 'BGMOVIE', 'SONGVOL', 'SEVOL']
HEADER_COURSE = ['COURSE', 'LEVEL', 'BALLOON', 'SCOREINIT', 'SCOREDIFF', 'STYLE']
BRANCH_COMMANDS = ['START', 'END', 'BRANCHSTART', 'BRANCHEND', 'N', 'E', 'M', 'SECTION']
MEASURE_COMMANDS = ['MEASURE', 'GOGOSTART', 'GOGOEND', 'BARLINEON', 'BARLINEOFF', 'SCROLL', 'BPMCHANGE', 'DELAY', 'LEVELHOLD']
COMMAND = BRANCH_COMMANDS + MEASURE_COMMANDS
# Note types for TJA files # Note types for TJA files
TJA_NOTE_TYPES = { TJA_NOTE_TYPES = {
'1': 'Don', '1': 'Don',

View File

@ -1,7 +1,8 @@
from copy import deepcopy from copy import deepcopy
import re
from tja2fumen.utils import computeSoulGaugeBytes from tja2fumen.utils import computeSoulGaugeBytes
from tja2fumen.constants import TJA_NOTE_TYPES, DIFFICULTY_BYTES, sampleHeaderMetadata, simpleHeaders from tja2fumen.constants import DIFFICULTY_BYTES, sampleHeaderMetadata, simpleHeaders
# Filler metadata that the `writeFumen` function expects # Filler metadata that the `writeFumen` function expects
# TODO: Determine how to properly set the item byte (https://github.com/vivaria/tja2fumen/issues/17) # TODO: Determine how to properly set the item byte (https://github.com/vivaria/tja2fumen/issues/17)
@ -22,7 +23,7 @@ default_measure = {
} }
def preprocessTJAMeasures(tja): def processTJACommands(tja):
""" """
Merge TJA 'data' and 'event' fields into a single measure property, and split Merge TJA 'data' and 'event' fields into a single measure property, and split
measures into sub-measures whenever a mid-measure BPM change occurs. measures into sub-measures whenever a mid-measure BPM change occurs.
@ -40,35 +41,20 @@ def preprocessTJAMeasures(tja):
In the future, this logic should probably be moved into the TJA parser itself. In the future, this logic should probably be moved into the TJA parser itself.
""" """
currentBPM = 0 currentBPM = float(tja['metadata']['bpm'])
currentScroll = 1.0 currentScroll = 1.0
currentGogo = False currentGogo = False
currentBarline = True currentBarline = True
currentDividend = 4
currentDivisor = 4
measuresCorrected = [] measuresCorrected = []
for measure in tja['measures']: for measure in tja['measures']:
# Step 1: Combine notes and events # Split measure into submeasure
notes = [{'pos': i, 'type': 'note', 'value': TJA_NOTE_TYPES[note]}
for i, note in enumerate(measure['data']) if note != '0']
events = [{'pos': e['position'], 'type': e['name'], 'value': e['value']}
for e in measure['events']]
combined = []
while notes or events:
if events and notes:
if notes[0]['pos'] >= events[0]['pos']:
combined.append(events.pop(0))
else:
combined.append(notes.pop(0))
elif events:
combined.append(events.pop(0))
elif notes:
combined.append(notes.pop(0))
# Step 2: Split measure into submeasure
measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline, measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline,
'subdivisions': len(measure['data']), 'pos_start': 0, 'pos_end': 0, 'subdivisions': len(measure['data']), 'pos_start': 0, 'pos_end': 0,
'time_sig': measure['length'], 'data': []} 'time_sig': [currentDividend, currentDivisor], 'data': []}
for data in combined: for data in measure['combined']:
if data['type'] == 'note': if data['type'] == 'note':
measure_cur['data'].append(data) measure_cur['data'].append(data)
# Update the current measure's SCROLL/GOGO/BARLINE status. # Update the current measure's SCROLL/GOGO/BARLINE status.
@ -96,7 +82,14 @@ def preprocessTJAMeasures(tja):
measuresCorrected.append(measure_cur) measuresCorrected.append(measure_cur)
measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline, measure_cur = {'bpm': currentBPM, 'scroll': currentScroll, 'gogo': currentGogo, 'barline': currentBarline,
'subdivisions': len(measure['data']), 'pos_start': data['pos'], 'pos_end': 0, 'subdivisions': len(measure['data']), 'pos_start': data['pos'], 'pos_end': 0,
'time_sig': measure['length'], 'data': []} 'time_sig': [currentDividend, currentDivisor], 'data': []}
elif data['type'] == 'measure':
matchMeasure = re.match(r"(\d+)/(\d+)", data['value'])
if not matchMeasure:
continue
currentDividend = int(matchMeasure.group(1))
currentDivisor = int(matchMeasure.group(2))
measure_cur['time_sig'] = [currentDividend, currentDivisor]
elif data['type'] == 'scroll': elif data['type'] == 'scroll':
currentScroll = data['value'] currentScroll = data['value']
elif data['type'] == 'gogo': elif data['type'] == 'gogo':
@ -114,11 +107,12 @@ def preprocessTJAMeasures(tja):
def convertTJAToFumen(tja): def convertTJAToFumen(tja):
# Hardcode currentBranch due to current lack of support for branching songs # Hardcode currentBranch due to current lack of support for branching songs
currentBranch = 'normal' # TODO: Program in branch support currentBranch = 'normal' # TODO: Program in branch support
tja['measures'] = preprocessTJAMeasures(tja)
measureDurationPrev = 0 measureDurationPrev = 0
currentDrumroll = None currentDrumroll = None
total_notes = 0 total_notes = 0
tja['measures'] = processTJACommands(tja)
# Parse TJA measures to create converted TJA -> Fumen file # Parse TJA measures to create converted TJA -> Fumen file
tjaConverted = {'measures': []} tjaConverted = {'measures': []}
for idx_m, measureTJA in enumerate(tja['measures']): for idx_m, measureTJA in enumerate(tja['measures']):
@ -188,8 +182,8 @@ def convertTJAToFumen(tja):
note = deepcopy(default_note) note = deepcopy(default_note)
note['pos'] = note_pos note['pos'] = note_pos
note['type'] = data['value'] note['type'] = data['value']
note['scoreInit'] = tja['scoreInit'] # Probably not fully accurate note['scoreInit'] = tja['metadata']['scoreInit'] # Probably not fully accurate
note['scoreDiff'] = tja['scoreDiff'] # Probably not fully accurate note['scoreDiff'] = tja['metadata']['scoreDiff'] # Probably not fully accurate
# Handle drumroll/balloon-specific metadata # Handle drumroll/balloon-specific metadata
if note['type'] in ["Balloon", "Kusudama"]: if note['type'] in ["Balloon", "Kusudama"]:
note['hits'] = tja['metadata']['balloon'].pop(0) note['hits'] = tja['metadata']['balloon'].pop(0)
@ -234,7 +228,7 @@ def convertTJAToFumen(tja):
tjaConverted['order'] = '<' tjaConverted['order'] = '<'
tjaConverted['unknownMetadata'] = 0 tjaConverted['unknownMetadata'] = 0
tjaConverted['branches'] = False tjaConverted['branches'] = False
tjaConverted['scoreInit'] = tja['scoreInit'] tjaConverted['scoreInit'] = tja['metadata']['scoreInit']
tjaConverted['scoreDiff'] = tja['scoreDiff'] tjaConverted['scoreDiff'] = tja['metadata']['scoreDiff']
return tjaConverted return tjaConverted

View File

@ -2,12 +2,7 @@ import os
import re import re
from tja2fumen.utils import readStruct, getBool, shortHex from tja2fumen.utils import readStruct, getBool, shortHex
from tja2fumen.constants import ( from tja2fumen.constants import NORMALIZE_COURSE, TJA_NOTE_TYPES, branchNames, noteTypes
# TJA constants
HEADER_GLOBAL, HEADER_COURSE, BRANCH_COMMANDS, MEASURE_COMMANDS, COMMAND, NORMALIZE_COURSE,
# Fumen constants
branchNames, noteTypes
)
######################################################################################################################## ########################################################################################################################
@ -21,101 +16,141 @@ def parseTJA(fnameTJA):
except UnicodeDecodeError: except UnicodeDecodeError:
tja = open(fnameTJA, "r", encoding="shift-jis") tja = open(fnameTJA, "r", encoding="shift-jis")
# Split into lines lines = [line for line in tja.read().splitlines() if line.strip() != '']
lines = tja.read().splitlines() courses = getCourseData(lines)
lines = [line for line in lines if line.strip()] # Discard empty lines for courseData in courses.values():
courseData['measures'] = parseCourseMeasures(courseData['measures'])
# Line by line return courses
headers = {}
def getCourseData(lines):
courses = {} courses = {}
currentCourse = '' currentCourse = ''
songBPM = 0
songOffset = 0
for line in lines: for line in lines:
parsed = parseLine(line) # Case 1: Header metadata
# Case 1: Comments (ignore)
if parsed['type'] == 'comment':
pass
# Case 2: Global header metadata
elif parsed['type'] == 'header' and parsed['scope'] == 'global':
headers[parsed['name'].lower()] = parsed['value']
# Case 3: Course data (metadata, commands, note data)
else:
# Check to see if we're starting a new course
if parsed['type'] == 'header' and parsed['scope'] == 'course' and parsed['name'] == 'COURSE':
currentCourse = NORMALIZE_COURSE[parsed['value']]
if currentCourse not in courses.keys():
courses[currentCourse] = []
# Append the line to the current course
courses[currentCourse].append(parsed)
# Convert parsed course lines into actual note data
songs = {}
for courseName, courseLines in courses.items():
courseHeader, courseMeasures = getCourse(headers, courseLines)
songs[courseName] = applyFumenStructureToParsedTJA(headers, courseHeader, courseMeasures)
return songs
def parseLine(line):
# Regex matches for various line types
match_comment = re.match(r"//.*", line)
match_header = re.match(r"^([A-Z]+):(.*)", line) match_header = re.match(r"^([A-Z]+):(.*)", line)
match_command = re.match(r"^#([A-Z]+)(?:\s+(.+))?", line) if match_header:
match_data = re.match(r"^(([0-9]|A|B|C|F|G)*,?).*$", line)
if match_comment:
return {"type": 'comment', "value": line}
elif match_header:
nameUpper = match_header.group(1).upper() nameUpper = match_header.group(1).upper()
value = match_header.group(2) value = match_header.group(2).strip()
if nameUpper in HEADER_GLOBAL:
return {"type": 'header', "scope": 'global', "name": nameUpper, "value": value.strip()}
elif nameUpper in HEADER_COURSE:
return {"type": 'header', "scope": 'course', "name": nameUpper, "value": value.strip()}
elif match_command: # Global header fields
nameUpper = match_command.group(1).upper() if nameUpper == 'BPM':
value = match_command.group(2) if match_command.group(2) else '' songBPM = value
if nameUpper in COMMAND: elif nameUpper == 'OFFSET':
return {"type": 'command', "name": nameUpper, "value": value.strip()} songOffset = value
elif match_data: # Course-specific header fields
return {"type": 'data', "data": match_data.group(1)} elif nameUpper == 'COURSE':
currentCourse = NORMALIZE_COURSE[value]
return {"type": 'unknown', "value": line} if currentCourse not in courses.keys():
courses[currentCourse] = {
'metadata': {'course': currentCourse, 'bpm': songBPM, 'offset': songOffset, 'level': 0,
def getCourse(tjaHeaders, lines): 'balloon': [], 'scoreInit': 0, 'scoreDiff': 0},
def parseHeaderMetadata(line): 'measures': [],
nonlocal headers }
if line["name"] == 'COURSE': elif nameUpper == 'LEVEL':
headers['course'] = NORMALIZE_COURSE[line['value']] courses[currentCourse]['metadata']['level'] = int(value) if value else 0
elif line["name"] == 'LEVEL': elif nameUpper == 'SCOREINIT':
headers['level'] = int(line['value']) if line['value'] else 0 courses[currentCourse]['metadata']['scoreInit'] = int(value) if value else 0
elif line["name"] == 'SCOREINIT': elif nameUpper == 'SCOREDIFF':
headers['scoreInit'] = int(line['value']) if line['value'] else 0 courses[currentCourse]['metadata']['scoreDiff'] = int(value) if value else 0
elif line["name"] == 'SCOREDIFF': elif nameUpper == 'BALLOON':
headers['scoreDiff'] = int(line['value']) if line['value'] else 0 if value:
elif line["name"] == 'BALLOON': balloons = [int(v) for v in value.split(",") if v]
if line['value']: courses[currentCourse]['metadata']['balloon'] = balloons
balloons = [int(v) for v in line['value'].split(",") if v] # STYLE is a P1/P2 command, which we don't support yet, so normally this would be a
else: # NotImplemetedError. However, TakoTako outputs `STYLE:SINGLE` when converting Ura
balloons = [] # charts, so throwing an error here would prevent Ura charts from being converted.
headers['balloon'] = balloons # See: https://github.com/vivaria/tja2fumen/issues/15#issuecomment-1575341088
# STYLE is a P1/P2 command, which we don't support yet, so normally this would be a NotImplemetedError. elif nameUpper == 'STYLE':
# However, TakoTako outputs `STYLE:SINGLE` when converting Ura charts, so throwing an error here prevents
# Ura charts from being converted. See: https://github.com/vivaria/tja2fumen/issues/15#issuecomment-1575341088
elif line["name"] == 'STYLE':
pass pass
else: else:
raise NotImplementedError pass # Ignore other header fields such as 'TITLE', 'SUBTITLE', 'WAVE', etc.
def parseBranchCommands(line): # Case 2: Commands and note data (to be further processed course-by-course later on)
nonlocal flagLevelhold, targetBranch, currentBranch elif not re.match(r"//.*", line): # Exclude comment-only lines ('//')
if line["name"] == 'BRANCHSTART': match_command = re.match(r"^#([A-Z]+)(?:\s+(.+))?", line)
match_notes = re.match(r"^(([0-9]|A|B|C|F|G)*,?).*$", line)
if match_command:
nameUpper = match_command.group(1).upper()
value = match_command.group(2).strip() if match_command.group(2) else ''
elif match_notes:
nameUpper = 'NOTES'
value = match_notes.group(1)
courses[currentCourse]['measures'].append({"name": nameUpper, "value": value})
return courses
def parseCourseMeasures(lines):
# Define state variables
currentBranch = 'N'
targetBranch = 'N'
flagLevelhold = False
# Process course lines
measures = []
measureNotes = ''
measureEvents = []
for line in lines:
assert currentBranch == targetBranch
# 1. Parse measure notes
if line['name'] == 'NOTES':
notes = line['value']
# If measure has ended, then append the measure and start anew
if notes.endswith(','):
measureNotes += notes[0:-1]
measure = {
"data": measureNotes,
"events": measureEvents,
}
measures.append(measure)
measureNotes = ''
measureEvents = []
# Otherwise, keep tracking measureNotes
else:
measureNotes += notes
# 2. Parse commands
else:
# Measure commands
if line['name'] == 'GOGOSTART':
measureEvents.append({"name": 'gogo', "position": len(measureNotes), "value": '1'})
elif line['name'] == 'GOGOEND':
measureEvents.append({"name": 'gogo', "position": len(measureNotes), "value": '0'})
elif line['name'] == 'BARLINEON':
measureEvents.append({"name": 'barline', "position": len(measureNotes), "value": '1'})
elif line['name'] == 'BARLINEOFF':
measureEvents.append({"name": 'barline', "position": len(measureNotes), "value": '0'})
elif line['name'] == 'SCROLL':
measureEvents.append({"name": 'scroll', "position": len(measureNotes), "value": float(line['value'])})
elif line['name'] == 'BPMCHANGE':
measureEvents.append({"name": 'bpm', "position": len(measureNotes), "value": float(line['value'])})
elif line['name'] == 'MEASURE':
measureEvents.append({"name": 'measure', "position": len(measureNotes), "value": line['value']})
# Branch commands
elif line["name"] == 'START' or line['name'] == 'END':
currentBranch = 'N'
targetBranch = 'N'
flagLevelhold = False
elif line['name'] == 'LEVELHOLD':
flagLevelhold = True
elif line["name"] == 'N':
currentBranch = 'N'
elif line["name"] == 'E':
currentBranch = 'E'
elif line["name"] == 'M':
currentBranch = 'M'
elif line["name"] == 'BRANCHEND':
currentBranch = targetBranch
elif line["name"] == 'BRANCHSTART':
if flagLevelhold: if flagLevelhold:
return continue
values = line['value'].split(',') values = line['value'].split(',')
if values[0] == 'r': if values[0] == 'r':
if len(values) >= 3: if len(values) >= 3:
@ -131,142 +166,53 @@ def getCourse(tjaHeaders, lines):
targetBranch = 'E' targetBranch = 'E'
else: else:
targetBranch = 'N' targetBranch = 'N'
elif line["name"] == 'BRANCHEND':
currentBranch = targetBranch
elif line["name"] == 'N':
currentBranch = 'N'
elif line["name"] == 'E':
currentBranch = 'E'
elif line["name"] == 'M':
currentBranch = 'M'
elif line["name"] == 'START' or line['name'] == 'END':
currentBranch = 'N'
targetBranch = 'N'
flagLevelhold = False
elif line['name'] == 'SECTION':
raise NotImplementedError
else:
raise NotImplementedError
def parseMeasureCommands(line): # Ignored commands
nonlocal measureDivisor, measureDividend, measureEvents, flagLevelhold
if line['name'] == 'MEASURE':
matchMeasure = re.match(r"(\d+)/(\d+)", line['value'])
if not matchMeasure:
return
measureDividend = int(matchMeasure.group(1))
measureDivisor = int(matchMeasure.group(2))
elif line['name'] == 'GOGOSTART':
measureEvents.append({"name": 'gogo', "position": len(measureData), "value": '1'})
elif line['name'] == 'GOGOEND':
measureEvents.append({"name": 'gogo', "position": len(measureData), "value": '0'})
elif line['name'] == 'BARLINEON':
measureEvents.append({"name": 'barline', "position": len(measureData), "value": '1'})
elif line['name'] == 'BARLINEOFF':
measureEvents.append({"name": 'barline', "position": len(measureData), "value": '0'})
elif line['name'] == 'SCROLL':
measureEvents.append({"name": 'scroll', "position": len(measureData), "value": float(line['value'])})
elif line['name'] == 'BPMCHANGE':
measureEvents.append({"name": 'bpm', "position": len(measureData), "value": float(line['value'])})
elif line['name'] == 'LEVELHOLD':
flagLevelhold = True
elif line['name'] == 'DELAY':
raise NotImplementedError
elif line['name'] == 'LYRIC': elif line['name'] == 'LYRIC':
pass pass
elif line['name'] == 'NEXTSONG': elif line['name'] == 'NEXTSONG':
pass pass
# Not implemented commands
elif line['name'] == 'SECTION':
raise NotImplementedError
elif line['name'] == 'DELAY':
raise NotImplementedError
else: else:
raise NotImplementedError raise NotImplementedError
def parseMeasureData(line): # If there is measure data (i.e. the file doesn't end on a "measure end" symbol ','), append whatever is left
nonlocal measures, measureData, measureDividend, measureDivisor, measureEvents if measureNotes:
data = line['data']
# If measure has ended, then append the measure and start anew
if data.endswith(','):
measureData += data[0:-1]
measure = {
"length": [measureDividend, measureDivisor],
"data": measureData,
"events": measureEvents,
}
measures.append(measure)
measureData = ''
measureEvents = []
# Otherwise, keep tracking measureData
else:
measureData += data
# Define state variables
headers = {'balloon': []} # Charters sometimes exclude `BALLOON` entirely if there are none
measures = []
measureDividend = 4
measureDivisor = 4
measureData = ''
measureEvents = []
currentBranch = 'N'
targetBranch = 'N'
flagLevelhold = False
# Process course lines
for line in lines:
if line["type"] == 'header':
parseHeaderMetadata(line)
elif line["type"] == 'command' and line['name'] in BRANCH_COMMANDS:
parseBranchCommands(line)
elif line["type"] == 'command' and line['name'] in MEASURE_COMMANDS and currentBranch == targetBranch:
parseMeasureCommands(line)
elif line['type'] == 'data' and currentBranch == targetBranch:
parseMeasureData(line)
# Post-processing: Ensure the first measure has a BPM event
if measures:
firstBPMEventFound = False
# Search for BPM event in the first measure
for i in range(len(measures[0]['events'])):
evt = measures[0]['events'][i]
if evt['name'] == 'bpm' and evt['position'] == 0:
firstBPMEventFound = True
# If not present, insert a BPM event into the first measure using the global header metadata
if not firstBPMEventFound:
# noinspection PyTypeChecker
measures[0]['events'].insert(0, {"name": 'bpm', "position": 0, "value": tjaHeaders['bpm']})
# Post-processing: In case the file doesn't end on a "measure end" symbol (','), append whatever is left
if measureData:
measures.append({ measures.append({
"length": [measureDividend, measureDivisor], "data": measureNotes,
"data": measureData,
"events": measureEvents, "events": measureEvents,
}) })
# Otherwise, if the file ends on a measure event (e.g. #GOGOEND), append any remaining events
# Post-processing: Otherwise, if the file ends on a measure event (e.g. #GOGOEND), append any remaining events
elif measureEvents: elif measureEvents:
for event in measureEvents: for event in measureEvents:
event['position'] = len(measures[len(measures) - 1]['data']) event['position'] = len(measures[len(measures) - 1]['data'])
# noinspection PyTypeChecker
measures[len(measures) - 1]['events'].append(event) measures[len(measures) - 1]['events'].append(event)
return headers, measures # Merge measure data and measure events in chronological order
for measure in measures:
notes = [{'pos': i, 'type': 'note', 'value': TJA_NOTE_TYPES[note]}
def applyFumenStructureToParsedTJA(globalHeader, courseHeader, measures): for i, note in enumerate(measure['data']) if note != '0']
"""Merge song metadata, course metadata, and course data into a single fumen-like object.""" events = [{'pos': e['position'], 'type': e['name'], 'value': e['value']}
song = {'measures': [], 'metadata': {}} for e in measure['events']]
combined = []
for k, v in globalHeader.items(): while notes or events:
song['metadata'][k] = v if events and notes:
if notes[0]['pos'] >= events[0]['pos']:
for k, v in courseHeader.items(): combined.append(events.pop(0))
if k in ['scoreInit', 'scoreDiff']:
song[k] = v
else: else:
song['metadata'][k] = v combined.append(notes.pop(0))
elif events:
combined.append(events.pop(0))
elif notes:
combined.append(notes.pop(0))
measure['combined'] = combined
for i, measure in enumerate(measures): return measures
song['measures'].append(measure)
return song
######################################################################################################################## ########################################################################################################################