Add remaining docstrings/comments

2025-02-25 22:28:08 +01:00 · 2023-07-27 13:12:21 -04:00 · 2023-07-27 13:12:21 -04:00 · 59046c22ec
commit 59046c22ec
parent 740ccbcd31
4 changed files with 112 additions and 28 deletions
--- a/src/tja2fumen/converters.py
+++ b/src/tja2fumen/converters.py
@ -5,27 +5,21 @@ from tja2fumen.types import TJAMeasureProcessed, FumenCourse, FumenNote

 def process_tja_commands(tja):
    """
-    Merge TJA 'data' and 'event' fields into a single measure property, and
-    split measures into sub-measures whenever a mid-measure BPM/SCROLL/GOGO
-    change occurs.
+    Process each #COMMAND present in a TJASong's measures, and assign their
+    values as attributes to each measure.

-    The TJA parser produces measure objects with two important properties:
-      - 'data': Contains the note data (1: don, 2: ka, etc.) along with
-                spacing (s)
-      - 'events' Contains event commands such as MEASURE, BPMCHANGE,
-                 GOGOTIME, etc.
+    This function takes care of two main tasks:
+        1. Keeping track of what the current values are for BPM, scroll,
+           gogotime, barline, and time signature (#MEASURE).
+        2. Detecting when a command is placed in the middle of a measure,
+           and splitting that measure into sub-measures.

-    However, notes and events can be intertwined within a single measure. So,
-    it's not possible to process them separately; they must be considered as
-    single sequence.
+    ((Note: We split measures into sub-measures because official `.bin` files
+      can only have 1 value for BPM/SCROLL/GOGO per measure. So, if a TJA
+      measure has multiple BPMs/SCROLLs/GOGOs, it has to be split up.))

-    A particular danger is BPM changes. TJA allows multiple BPMs within a
-    single measure, but the fumen format permits one BPM per measure. So, a
-    TJA measure must be split up if it has multiple BPM changes within a
-    measure.
-
-    In the future, this logic should probably be moved into the TJA parser
-    itself.
+    After this function is finished, all the #COMMANDS will be gone, and each
+    measure will have attributes (e.g. measure.bpm, measure.scroll) instead.
    """
    tja_branches_processed = {branch_name: []
                              for branch_name in tja.branches.keys()}
@ -37,7 +31,6 @@ def process_tja_commands(tja):
        current_dividend = 4
        current_divisor = 4
        for measure_tja in branch_measures_tja:
-            # Split measure into submeasure
            measure_tja_processed = TJAMeasureProcessed(
                bpm=current_bpm,
                scroll=current_scroll,
@ -75,7 +68,7 @@ def process_tja_commands(tja):
                # measure. (For fumen files, if there is a mid-measure change
                # to BPM/SCROLL/GOGO, then the measure will actually be split
                # into two small submeasures. So, we need to start a new
-                # measure in those cases.
+                # measure in those cases.)
                elif data.name in ['bpm', 'scroll', 'gogo']:
                    # Parse the values
                    if data.name == 'bpm':
@ -133,6 +126,36 @@ def process_tja_commands(tja):


 def convert_tja_to_fumen(tja):
+    """
+    Convert TJA data to Fumen data by calculating Fumen-specific values.
+
+    Fumen files (`.bin`) use a very strict file structure. Certain values are
+    expected at very specific byte locations in the file, such as:
+      - Header metadata (first 520 bytes). The header stores information such
+        as branch points for each note type, soul gauge behavior, etc.
+      - Note data (millisecond offset values, drumroll duration, etc.)
+      - Branch condition info for each measure
+
+    Since TJA files only contain notes and commands, we must compute all of
+    these extra values ourselves. The values are then stored in new "Fumen"
+    Python objects that mimic the structure of the fumen `.bin` files:
+
+    FumenCourse
+    ├─ FumenMeasure
+    │  ├─ FumenBranch ('normal')
+    │  │  ├─ FumenNote
+    │  │  ├─ FumenNote
+    │  │  └─ ...
+    │  ├─ FumenBranch ('professional')
+    │  └─ FumenBranch ('master')
+    ├─ FumenMeasure
+    ├─ FumenMeasure
+    └─ ...
+
+    ((Note: The fumen file structure is the opposite of the TJA file structure;
+    branch data is stored within the measure object, rather than measure data
+    being stored within the branch object.))
+    """
    # Preprocess commands
    tja_branches_processed = process_tja_commands(tja)

@ -179,7 +202,7 @@ def convert_tja_to_fumen(tja):
                subdivisions=measure_tja.subdivisions
            )

-            # Compute the millisecnd offsets for the start/end of each measure
+            # Compute the millisecond offsets for the start/end of each measure
            measure_fumen.set_ms_offsets(
                song_offset=tja.offset,
                delay=measure_tja.delay,
@ -205,7 +228,7 @@ def convert_tja_to_fumen(tja):
                    first_branch_condition=(not branch_conditions),
                    has_section=bool(measure_tja.section)
                )
-                # Reset the points to prepare for the next #BRANCHSTART p
+                # Reset the points to prepare for the next `#BRANCHSTART p`
                branch_points_total = 0
                # Keep track of the branch conditions (to later determine how
                # to set the header bytes for branches)
--- a/src/tja2fumen/parsers.py
+++ b/src/tja2fumen/parsers.py
@ -30,7 +30,7 @@ def parse_tja(fname_tja):

 def split_tja_lines_into_courses(lines):
    """
-    Parse TJA metadata in order to divide TJA lines into separate courses.
+    Parse TJA metadata in order to split TJA lines into separate courses.

    In TJA files, metadata lines are denoted by a colon (':'). These lines
    provide general info about the song (BPM, TITLE, OFFSET, etc.). They also
@ -332,6 +332,27 @@ def parse_tja_course_data(course):
 def parse_fumen(fumen_file, exclude_empty_measures=False):
    """
    Parse bytes of a fumen .bin file into nested measures, branches, and notes.
+
+    Fumen files use a very strict file structure. Certain values are expected
+    at very specific byte locations in the file. Here, we parse these specific
+    byte locations into the following structure:
+
+    FumenCourse
+    ├─ FumenHeader
+    │  ├─ Timing windows
+    │  ├─ Branch points
+    │  ├─ Soul gauge bytes
+    │  └─ ...
+    ├─ FumenMeasure
+    │  ├─ FumenBranch ('normal')
+    │  │  ├─ FumenNote
+    │  │  ├─ FumenNote
+    │  │  └─ ...
+    │  ├─ FumenBranch ('professional')
+    │  └─ FumenBranch ('master')
+    ├─ FumenMeasure
+    ├─ FumenMeasure
+    └─ ...
    """
    file = open(fumen_file, "rb")
    size = os.fstat(file.fileno()).st_size
--- a/src/tja2fumen/types.py
+++ b/src/tja2fumen/types.py
@ -12,7 +12,10 @@ class DefaultObject:


 class TJASong(DefaultObject):
+    """Contains all the data in a single TJA (`.tja`) chart file."""
    def __init__(self, BPM=None, offset=None):
+        # Note: TJA song metadata (e.g. TITLE, SUBTITLE, WAVE) is not stored
+        #       because it is not needed to convert a `.tja` to `.bin` files.
        self.BPM = float(BPM)
        self.offset = float(offset)
        self.courses = {course: TJACourse(self.BPM, self.offset, course)
@ -25,6 +28,7 @@ class TJASong(DefaultObject):


 class TJACourse(DefaultObject):
+    """Contains all the data in a single TJA `COURSE:` section."""
    def __init__(self, BPM, offset, course, level=0, balloon=None,
                 score_init=0, score_diff=0):
        self.level = level
@ -35,6 +39,7 @@ class TJACourse(DefaultObject):
        self.offset = offset
        self.course = course
        self.data = []
+        # A "TJA Branch" is just a list of measures
        self.branches = {
            'normal': [TJAMeasure()],
            'professional': [TJAMeasure()],
@ -47,6 +52,7 @@ class TJACourse(DefaultObject):


 class TJAMeasure(DefaultObject):
+    """Contains all the data in a single TJA measure (denoted by ',')."""
    def __init__(self, notes=None, events=None):
        self.notes = [] if notes is None else notes
        self.events = [] if events is None else events
@ -54,6 +60,15 @@ class TJAMeasure(DefaultObject):


 class TJAMeasureProcessed(DefaultObject):
+    """
+    Contains all the data in a single TJA measure (denoted by ','), but with
+    all `#COMMAND` lines processed, and their values stored as attributes.
+
+    ((Note: Because only one BPM/SCROLL/GOGO value can be stored per measure,
+      any TJA measures with mid-measure commands must be split up. So, the
+      number of `TJAMeasureProcessed` objects will often be greater than
+      the number of `TJAMeasure` objects for a given song.))
+    """
    def __init__(self, bpm, scroll, gogo, barline, time_sig, subdivisions,
                 pos_start=0, pos_end=0, delay=0, section=None,
                 branch_start=None, data=None):
@ -72,13 +87,16 @@ class TJAMeasureProcessed(DefaultObject):


 class TJAData(DefaultObject):
+    """Contains the information for a single note or single command."""
    def __init__(self, name, value, pos=None):
+        # For TJA, 'pos' is stored as an integer rather than in milliseconds
        self.pos = pos
        self.name = name
        self.value = value


 class FumenCourse(DefaultObject):
+    """Contains all the data in a single Fumen (`.bin`) chart file."""
    def __init__(self, measures=None, header=None, score_init=0, score_diff=0):
        if isinstance(measures, int):
            self.measures = [FumenMeasure() for _ in range(measures)]
@ -90,6 +108,7 @@ class FumenCourse(DefaultObject):


 class FumenMeasure(DefaultObject):
+    """Contains all the data in a single Fumen measure."""
    def __init__(self, bpm=0.0, offset_start=0.0, offset_end=0.0,
                 duration=0.0, gogo=False, barline=True, branch_start=None,
                 branch_info=None, padding1=0, padding2=0):
@ -128,7 +147,7 @@ class FumenMeasure(DefaultObject):
        if first_measure:
            self.offset_start = (song_offset * -1000) - (4 * 60_000 / self.bpm)
        else:
-            # First, start with sing the end timing of the previous measure
+            # First, start with the end timing of the previous measure
            self.offset_start = prev_measure.offset_end
            # Add any #DELAY commands
            self.offset_start += delay
@ -192,6 +211,7 @@ class FumenMeasure(DefaultObject):


 class FumenBranch(DefaultObject):
+    """Contains all the data in a single Fumen branch."""
    def __init__(self, length=0, speed=0.0, padding=0):
        self.length = length
        self.speed = speed
@ -200,6 +220,7 @@ class FumenBranch(DefaultObject):


 class FumenNote(DefaultObject):
+    """Contains all the byte values for a single Fumen note."""
    def __init__(self, note_type='', pos=0.0, score_init=0, score_diff=0,
                 padding=0, item=0, duration=0.0, multimeasure=False,
                 hits=0, hits_padding=0,
@ -221,6 +242,7 @@ class FumenNote(DefaultObject):


 class FumenHeader(DefaultObject):
+    """Contains all the byte values for a Fumen chart file's header."""
    def __init__(self, raw_bytes=None):
        if raw_bytes is None:
            self.order = "<"
@ -230,7 +252,10 @@ class FumenHeader(DefaultObject):
            self._parse_header_values(raw_bytes)

    def _assign_default_header_values(self):
-        # This byte string corresponds to
+        """Set the default header values."""
+        # This byte string corresponds to the timing windows for Hard/Oni
+        # ((When these bytes are parsed, you get roughly about
+        #   (25.025, 75.075, 108.442), but repeated 36 times.))
        timing_windows = self.up(b'43\xc8Ag&\x96B"\xe2\xd8B' * 36, "fff" * 36)
        self.b000_b431_timing_windows             = timing_windows
        self.b432_b435_has_branches               = 0
@ -257,9 +282,9 @@ class FumenHeader(DefaultObject):
        self.b516_b519_unknown_data               = 0

    def _parse_header_values(self, raw_bytes):
-        rb = raw_bytes
-        self.b000_b431_timing_windows             = self.up(rb, "f" * 108,
-                                                            0, 431)
+        """Parse a raw string of 520 bytes to get the header values."""
+        rb = raw_bytes  # We use a shortened form just for visual clarity:
+        self.b000_b431_timing_windows           = self.up(rb, "f"*108, 0, 431)
        self.b432_b435_has_branches               = self.up(rb, "i", 432, 435)
        self.b436_b439_hp_max                     = self.up(rb, "i", 436, 439)
        self.b440_b443_hp_clear                   = self.up(rb, "i", 440, 443)
@ -284,13 +309,17 @@ class FumenHeader(DefaultObject):
        self.b516_b519_unknown_data               = self.up(rb, "i", 516, 519)

    def up(self, raw_bytes, type_string, s=None, e=None):
+        """Unpack a raw byte string according to specific types."""
        if s is not None and e is not None:
            raw_bytes = raw_bytes[s:e+1]
        vals = struct.unpack(self.order + type_string, raw_bytes)
        return vals[0] if len(vals) == 1 else vals

    def _parse_order(self, raw_bytes):
+        """Parse the order of the song (little or big endian)."""
        self.order = ''
+        # Bytes 512-515 are the number of measures. We check the values using
+        # both little and big endian, then compare to see which is correct.
        if (self.up(raw_bytes, ">I", 512, 515) <
                self.up(raw_bytes, "<I", 512, 515)):
            return ">"
@ -298,12 +327,15 @@ class FumenHeader(DefaultObject):
            return "<"

    def set_hp_bytes(self, n_notes, difficulty, stars):
+        """Compute header bytes related to the soul gauge (HP) behavior."""
+        # Note: Ura Oni is equivalent to Oni for soul gauge behavior
        difficulty = 'Oni' if difficulty in ['Ura', 'Edit'] else difficulty
        self._get_hp_from_LUTs(n_notes, difficulty, stars)
        self.b440_b443_hp_clear = {'Easy': 6000, 'Normal': 7000,
                                   'Hard': 7000, 'Oni': 8000}[difficulty]

    def _get_hp_from_LUTs(self, n_notes, difficulty, stars):
+        """Fetch pre-computed soul gauge values from lookup tables (LUTs)."""
        if not 0 < n_notes <= 2500:
            return
        star_to_key = {
@ -332,6 +364,7 @@ class FumenHeader(DefaultObject):

    @property
    def raw_bytes(self):
+        """Represent the header values as a string of raw bytes."""
        value_list = []
        format_string = self.order
        for key, val in self.__dict__.items():
--- a/src/tja2fumen/writers.py
+++ b/src/tja2fumen/writers.py
@ -4,6 +4,12 @@ from tja2fumen.constants import BRANCH_NAMES, FUMEN_TYPE_NOTES


 def write_fumen(path_out, song):
+    """
+    Write the values in a FumenCourse object to a `.bin` file.
+
+    This operation is the reverse of the `parse_fumen` function. Please refer
+    to that function for more details about the fumen file structure.
+    """
    with open(path_out, "wb") as file:
        file.write(song.header.raw_bytes)

@ -42,6 +48,7 @@ def write_fumen(path_out, song):


 def write_struct(file, order, format_string, value_list, seek=None):
+    """Pack (int, float, etc.) values into a string of bytes, then write."""
    if seek:
        file.seek(seek)
    packed_bytes = struct.pack(order + format_string, *value_list)