Add remaining docstrings/comments

2025-01-24 23:13:40 +01:00 · 2023-07-27 13:12:21 -04:00 · 2023-07-27 13:12:21 -04:00 · 59046c22ec
commit 59046c22ec
parent 740ccbcd31
4 changed files with 112 additions and 28 deletions
--- a/src/tja2fumen/converters.py
+++ b/src/tja2fumen/converters.py
@ -5,27 +5,21 @@ from tja2fumen.types import TJAMeasureProcessed, FumenCourse, FumenNote
 def process_tja_commands(tja):
    """
-    Merge TJA 'data' and 'event' fields into a single measure property, and
+    Process each #COMMAND present in a TJASong's measures, and assign their
-    split measures into sub-measures whenever a mid-measure BPM/SCROLL/GOGO
+    values as attributes to each measure.
    change occurs.
-    The TJA parser produces measure objects with two important properties:
+    This function takes care of two main tasks:
-      - 'data': Contains the note data (1: don, 2: ka, etc.) along with
+        1. Keeping track of what the current values are for BPM, scroll,
-                spacing (s)
+           gogotime, barline, and time signature (#MEASURE).
-      - 'events' Contains event commands such as MEASURE, BPMCHANGE,
+        2. Detecting when a command is placed in the middle of a measure,
-                 GOGOTIME, etc.
+           and splitting that measure into sub-measures.
-    However, notes and events can be intertwined within a single measure. So,
+    ((Note: We split measures into sub-measures because official `.bin` files
-    it's not possible to process them separately; they must be considered as
+      can only have 1 value for BPM/SCROLL/GOGO per measure. So, if a TJA
-    single sequence.
+      measure has multiple BPMs/SCROLLs/GOGOs, it has to be split up.))
-    A particular danger is BPM changes. TJA allows multiple BPMs within a
+    After this function is finished, all the #COMMANDS will be gone, and each
-    single measure, but the fumen format permits one BPM per measure. So, a
+    measure will have attributes (e.g. measure.bpm, measure.scroll) instead.
    TJA measure must be split up if it has multiple BPM changes within a
    measure.
    In the future, this logic should probably be moved into the TJA parser
    itself.
    """
    tja_branches_processed = {branch_name: []
                              for branch_name in tja.branches.keys()}
@ -37,7 +31,6 @@ def process_tja_commands(tja):
        current_dividend = 4
        current_divisor = 4
        for measure_tja in branch_measures_tja:
            # Split measure into submeasure
            measure_tja_processed = TJAMeasureProcessed(
                bpm=current_bpm,
                scroll=current_scroll,
@ -75,7 +68,7 @@ def process_tja_commands(tja):
                # measure. (For fumen files, if there is a mid-measure change
                # to BPM/SCROLL/GOGO, then the measure will actually be split
                # into two small submeasures. So, we need to start a new
-                # measure in those cases.
+                # measure in those cases.)
                elif data.name in ['bpm', 'scroll', 'gogo']:
                    # Parse the values
                    if data.name == 'bpm':
@ -133,6 +126,36 @@ def process_tja_commands(tja):
 def convert_tja_to_fumen(tja):
    """
    Convert TJA data to Fumen data by calculating Fumen-specific values.
    Fumen files (`.bin`) use a very strict file structure. Certain values are
    expected at very specific byte locations in the file, such as:
      - Header metadata (first 520 bytes). The header stores information such
        as branch points for each note type, soul gauge behavior, etc.
      - Note data (millisecond offset values, drumroll duration, etc.)
      - Branch condition info for each measure
    Since TJA files only contain notes and commands, we must compute all of
    these extra values ourselves. The values are then stored in new "Fumen"
    Python objects that mimic the structure of the fumen `.bin` files:
    FumenCourse
    ├─ FumenMeasure
    │  ├─ FumenBranch ('normal')
    │  │  ├─ FumenNote
    │  │  ├─ FumenNote
    │  │  └─ ...
    │  ├─ FumenBranch ('professional')
    │  └─ FumenBranch ('master')
    ├─ FumenMeasure
    ├─ FumenMeasure
    └─ ...
    ((Note: The fumen file structure is the opposite of the TJA file structure;
    branch data is stored within the measure object, rather than measure data
    being stored within the branch object.))
    """
    # Preprocess commands
    tja_branches_processed = process_tja_commands(tja)
@ -179,7 +202,7 @@ def convert_tja_to_fumen(tja):
                subdivisions=measure_tja.subdivisions
            )
-            # Compute the millisecnd offsets for the start/end of each measure
+            # Compute the millisecond offsets for the start/end of each measure
            measure_fumen.set_ms_offsets(
                song_offset=tja.offset,
                delay=measure_tja.delay,
@ -205,7 +228,7 @@ def convert_tja_to_fumen(tja):
                    first_branch_condition=(not branch_conditions),
                    has_section=bool(measure_tja.section)
                )
-                # Reset the points to prepare for the next #BRANCHSTART p
+                # Reset the points to prepare for the next `#BRANCHSTART p`
                branch_points_total = 0
                # Keep track of the branch conditions (to later determine how
                # to set the header bytes for branches)
--- a/src/tja2fumen/parsers.py
+++ b/src/tja2fumen/parsers.py
@ -30,7 +30,7 @@ def parse_tja(fname_tja):
 def split_tja_lines_into_courses(lines):
    """
-    Parse TJA metadata in order to divide TJA lines into separate courses.
+    Parse TJA metadata in order to split TJA lines into separate courses.
    In TJA files, metadata lines are denoted by a colon (':'). These lines
    provide general info about the song (BPM, TITLE, OFFSET, etc.). They also
@ -332,6 +332,27 @@ def parse_tja_course_data(course):
 def parse_fumen(fumen_file, exclude_empty_measures=False):
    """
    Parse bytes of a fumen .bin file into nested measures, branches, and notes.
    Fumen files use a very strict file structure. Certain values are expected
    at very specific byte locations in the file. Here, we parse these specific
    byte locations into the following structure:
    FumenCourse
    ├─ FumenHeader
    │  ├─ Timing windows
    │  ├─ Branch points
    │  ├─ Soul gauge bytes
    │  └─ ...
    ├─ FumenMeasure
    │  ├─ FumenBranch ('normal')
    │  │  ├─ FumenNote
    │  │  ├─ FumenNote
    │  │  └─ ...
    │  ├─ FumenBranch ('professional')
    │  └─ FumenBranch ('master')
    ├─ FumenMeasure
    ├─ FumenMeasure
    └─ ...
    """
    file = open(fumen_file, "rb")
    size = os.fstat(file.fileno()).st_size
--- a/src/tja2fumen/types.py
+++ b/src/tja2fumen/types.py
@ -12,7 +12,10 @@ class DefaultObject:
 class TJASong(DefaultObject):
    """Contains all the data in a single TJA (`.tja`) chart file."""
    def __init__(self, BPM=None, offset=None):
        # Note: TJA song metadata (e.g. TITLE, SUBTITLE, WAVE) is not stored
        #       because it is not needed to convert a `.tja` to `.bin` files.
        self.BPM = float(BPM)
        self.offset = float(offset)
        self.courses = {course: TJACourse(self.BPM, self.offset, course)
@ -25,6 +28,7 @@ class TJASong(DefaultObject):
 class TJACourse(DefaultObject):
    """Contains all the data in a single TJA `COURSE:` section."""
    def __init__(self, BPM, offset, course, level=0, balloon=None,
                 score_init=0, score_diff=0):
        self.level = level
@ -35,6 +39,7 @@ class TJACourse(DefaultObject):
        self.offset = offset
        self.course = course
        self.data = []
        # A "TJA Branch" is just a list of measures
        self.branches = {
            'normal': [TJAMeasure()],
            'professional': [TJAMeasure()],
@ -47,6 +52,7 @@ class TJACourse(DefaultObject):
 class TJAMeasure(DefaultObject):
    """Contains all the data in a single TJA measure (denoted by ',')."""
    def __init__(self, notes=None, events=None):
        self.notes = [] if notes is None else notes
        self.events = [] if events is None else events
@ -54,6 +60,15 @@ class TJAMeasure(DefaultObject):
 class TJAMeasureProcessed(DefaultObject):
    """
    Contains all the data in a single TJA measure (denoted by ','), but with
    all `#COMMAND` lines processed, and their values stored as attributes.
    ((Note: Because only one BPM/SCROLL/GOGO value can be stored per measure,
      any TJA measures with mid-measure commands must be split up. So, the
      number of `TJAMeasureProcessed` objects will often be greater than
      the number of `TJAMeasure` objects for a given song.))
    """
    def __init__(self, bpm, scroll, gogo, barline, time_sig, subdivisions,
                 pos_start=0, pos_end=0, delay=0, section=None,
                 branch_start=None, data=None):
@ -72,13 +87,16 @@ class TJAMeasureProcessed(DefaultObject):
 class TJAData(DefaultObject):
    """Contains the information for a single note or single command."""
    def __init__(self, name, value, pos=None):
        # For TJA, 'pos' is stored as an integer rather than in milliseconds
        self.pos = pos
        self.name = name
        self.value = value
 class FumenCourse(DefaultObject):
    """Contains all the data in a single Fumen (`.bin`) chart file."""
    def __init__(self, measures=None, header=None, score_init=0, score_diff=0):
        if isinstance(measures, int):
            self.measures = [FumenMeasure() for _ in range(measures)]
@ -90,6 +108,7 @@ class FumenCourse(DefaultObject):
 class FumenMeasure(DefaultObject):
    """Contains all the data in a single Fumen measure."""
    def __init__(self, bpm=0.0, offset_start=0.0, offset_end=0.0,
                 duration=0.0, gogo=False, barline=True, branch_start=None,
                 branch_info=None, padding1=0, padding2=0):
@ -128,7 +147,7 @@ class FumenMeasure(DefaultObject):
        if first_measure:
            self.offset_start = (song_offset * -1000) - (4 * 60_000 / self.bpm)
        else:
-            # First, start with sing the end timing of the previous measure
+            # First, start with the end timing of the previous measure
            self.offset_start = prev_measure.offset_end
            # Add any #DELAY commands
            self.offset_start += delay
@ -192,6 +211,7 @@ class FumenMeasure(DefaultObject):
 class FumenBranch(DefaultObject):
    """Contains all the data in a single Fumen branch."""
    def __init__(self, length=0, speed=0.0, padding=0):
        self.length = length
        self.speed = speed
@ -200,6 +220,7 @@ class FumenBranch(DefaultObject):
 class FumenNote(DefaultObject):
    """Contains all the byte values for a single Fumen note."""
    def __init__(self, note_type='', pos=0.0, score_init=0, score_diff=0,
                 padding=0, item=0, duration=0.0, multimeasure=False,
                 hits=0, hits_padding=0,
@ -221,6 +242,7 @@ class FumenNote(DefaultObject):
 class FumenHeader(DefaultObject):
    """Contains all the byte values for a Fumen chart file's header."""
    def __init__(self, raw_bytes=None):
        if raw_bytes is None:
            self.order = "<"
@ -230,7 +252,10 @@ class FumenHeader(DefaultObject):
            self._parse_header_values(raw_bytes)
    def _assign_default_header_values(self):
-        # This byte string corresponds to
+        """Set the default header values."""
        # This byte string corresponds to the timing windows for Hard/Oni
        # ((When these bytes are parsed, you get roughly about
        #   (25.025, 75.075, 108.442), but repeated 36 times.))
        timing_windows = self.up(b'43\xc8Ag&\x96B"\xe2\xd8B' * 36, "fff" * 36)
        self.b000_b431_timing_windows             = timing_windows
        self.b432_b435_has_branches               = 0
@ -257,9 +282,9 @@ class FumenHeader(DefaultObject):
        self.b516_b519_unknown_data               = 0
    def _parse_header_values(self, raw_bytes):
-        rb = raw_bytes
+        """Parse a raw string of 520 bytes to get the header values."""
-        self.b000_b431_timing_windows             = self.up(rb, "f" * 108,
+        rb = raw_bytes  # We use a shortened form just for visual clarity:
-                                                            0, 431)
+        self.b000_b431_timing_windows           = self.up(rb, "f"*108, 0, 431)
        self.b432_b435_has_branches               = self.up(rb, "i", 432, 435)
        self.b436_b439_hp_max                     = self.up(rb, "i", 436, 439)
        self.b440_b443_hp_clear                   = self.up(rb, "i", 440, 443)
@ -284,13 +309,17 @@ class FumenHeader(DefaultObject):
        self.b516_b519_unknown_data               = self.up(rb, "i", 516, 519)
    def up(self, raw_bytes, type_string, s=None, e=None):
        """Unpack a raw byte string according to specific types."""
        if s is not None and e is not None:
            raw_bytes = raw_bytes[s:e+1]
        vals = struct.unpack(self.order + type_string, raw_bytes)
        return vals[0] if len(vals) == 1 else vals
    def _parse_order(self, raw_bytes):
        """Parse the order of the song (little or big endian)."""
        self.order = ''
        # Bytes 512-515 are the number of measures. We check the values using
        # both little and big endian, then compare to see which is correct.
        if (self.up(raw_bytes, ">I", 512, 515) <
                self.up(raw_bytes, "<I", 512, 515)):
            return ">"
@ -298,12 +327,15 @@ class FumenHeader(DefaultObject):
            return "<"
    def set_hp_bytes(self, n_notes, difficulty, stars):
        """Compute header bytes related to the soul gauge (HP) behavior."""
        # Note: Ura Oni is equivalent to Oni for soul gauge behavior
        difficulty = 'Oni' if difficulty in ['Ura', 'Edit'] else difficulty
        self._get_hp_from_LUTs(n_notes, difficulty, stars)
        self.b440_b443_hp_clear = {'Easy': 6000, 'Normal': 7000,
                                   'Hard': 7000, 'Oni': 8000}[difficulty]
    def _get_hp_from_LUTs(self, n_notes, difficulty, stars):
        """Fetch pre-computed soul gauge values from lookup tables (LUTs)."""
        if not 0 < n_notes <= 2500:
            return
        star_to_key = {
@ -332,6 +364,7 @@ class FumenHeader(DefaultObject):
    @property
    def raw_bytes(self):
        """Represent the header values as a string of raw bytes."""
        value_list = []
        format_string = self.order
        for key, val in self.__dict__.items():
--- a/src/tja2fumen/writers.py
+++ b/src/tja2fumen/writers.py
@ -4,6 +4,12 @@ from tja2fumen.constants import BRANCH_NAMES, FUMEN_TYPE_NOTES
 def write_fumen(path_out, song):
    """
    Write the values in a FumenCourse object to a `.bin` file.
    This operation is the reverse of the `parse_fumen` function. Please refer
    to that function for more details about the fumen file structure.
    """
    with open(path_out, "wb") as file:
        file.write(song.header.raw_bytes)
@ -42,6 +48,7 @@ def write_fumen(path_out, song):
 def write_struct(file, order, format_string, value_list, seek=None):
    """Pack (int, float, etc.) values into a string of bytes, then write."""
    if seek:
        file.seek(seek)
    packed_bytes = struct.pack(order + format_string, *value_list)