Fix tri-Ace codec joint stereo output

Codec should now be accurate (thanks Nisto for figuring out all this)
2024-11-15 11:07:40 +01:00 · 2021-03-04 20:04:45 +01:00 · 2021-03-04 20:04:45 +01:00 · f064086d5e
commit f064086d5e
parent cbb8211022
3 changed files with 47 additions and 28 deletions
--- a/src/coding/tac_decoder_lib.c
+++ b/src/coding/tac_decoder_lib.c
@ -25,14 +25,14 @@
 * Codec returns float samples then converted to PCM16. Output samples are +-1 vs Nisto's/PCSX2's
 * results, due to various quirks:
 * - simplified PS2 float handling (PS2 VU floats don't map 1:1 to PC IEEE floats), can be re-enabled (slow)
- * - various heisenbugs (PC 80b register floats <> 32b memory floats conversions, see transform).
+ * - various heisenbugs (PC 80b register floats <> 32b memory floats conversions, see transform()).
 *
 * Files are divided into blocks (size 0x4E000). At file start is a simple header and huffman codebook
 * then N VBR frames (of size around 0x200~300) containing huffman codes of spectral data. A frame has
- * codes for 2 channels, decoded separatedly (first all L then all R). Spectrum coefs are processeed,
+ * codes for 2 channels, decoded separatedly (first all L then all R), then handle joint stereo.
- * then MDCT(?) + window overlap to get final samples. When a "block end frame" is found, handler must
+ * Channel spectrum coefs are processeed, then MDCT(?) + window overlap to get final samples. When a
- * get next block and resume decoding (blocks may be pre/post padded, for looping porposes). Game reads
+ * "block end frame" is found, handler must get next block and resume decoding (blocks may be pre/post
- * a couple of blocks at once though.
+ * padded, for looping porposes). Game reads a couple of blocks at once though.
 */
 /**********************************************************************************/
@ -884,17 +884,6 @@ static void process(REG_VF* wave, REG_VF* hist) {
 }
 /* Fix joint stereo files that only encode diffs in R (assumed, double check) */
 static void parse_joint_stereo(REG_VF* resultL, REG_VF* resultR) {
    int i;
    /* Combine OG L sample + R diff. For pseudo-mono files R is all 0s
     * (R only saves 28 huffman codes, signalling no coefs per 1+27 bands) */
    for (i = 0; i < TAC_TOTAL_POINTS * 8; i++) {
        ADD  (_xyzw, &resultR[i], &resultL[i], &resultR[i]);
    }
 }
 ///////////////////////////////////////////////////////////////////////////////
 /* main decoding in the VU1 coprocessor */
@ -910,11 +899,35 @@ static void decode_vu1(tac_handle_t* h) {
    }
    /* Decoded data is originally stored in VUMem1 as clamped ints, though final step
-     * seems may be done done externally (StFlushWriteBuffer/StMakeFinalOut?) */
+     * seems may be done done externally (StMakeFinalOut/StFlushWriteBuffer) */
 }
 /* Create final output samples */
 // StMakeFinalOut
 static void finalize_output(tac_handle_t* h) {
    int i;
    /* original code copies + clamps to PCM buffer here instead of modifying wave,
     * but we do it later to potentially allow float output. It also sets total output:
     * - type 1 (at loop frame): start_sample = loop_discard, frame_samples = 1024 - loop_discard
     * - type 2 (at last frame): start_sample = 0, frame_samples = frame_last + 1
     * - other: start_sample = 0, frame_samples = 1024
     * (only copies or does joint stereo from start_sample) */
    /* this step may be outside VU1 code */
    if (h->header.joint_stereo) {
-        parse_joint_stereo(h->wave[0], h->wave[1]);
+        REG_VF* wave_l = h->wave[0];
        REG_VF* wave_r = h->wave[1];
        /* Combine joint stereo channels that encode diffs in L/R. In pseudo-mono files R has */
        /* all samples as 0 (R only saves 28 huffman codes, signalling no coefs per 1+27 bands) */
        for (i = 0; i < TAC_TOTAL_POINTS * 8; i++) {
            REG_VF samples_l, samples_r;
            ADD  (_xyzw, &samples_l, &wave_l[i], &wave_r[i]); /* L = L + R */
            SUB  (_xyzw, &samples_r, &wave_l[i], &wave_r[i]); /* R = L - R */
            MOVE (_xyzw, &wave_l[i], &samples_l);
            MOVE (_xyzw, &wave_r[i], &samples_r);
        }
    }
 }
@ -1041,7 +1054,7 @@ static int init_header(tac_header_t* header, const uint8_t* buf) {
    header->loop_frame      = get_u16le(buf+0x08);
    header->loop_discard    = get_u16le(buf+0x0A);
    header->frame_count     = get_u16le(buf+0x0C);
-    header->frame_discard   = get_u16le(buf+0x0E);
+    header->frame_last      = get_u16le(buf+0x0E);
    header->loop_offset     = get_u32le(buf+0x10);
    header->file_size       = get_u32le(buf+0x14);
    header->joint_stereo    = get_u32le(buf+0x18);
@ -1053,8 +1066,8 @@ static int init_header(tac_header_t* header, const uint8_t* buf) {
    /* header size ia block-aligned (but actual size can be smaller, ex. VP 00000715) */
    if (header->file_size % TAC_BLOCK_SIZE != 0)
        return TAC_PROCESS_HEADER_ERROR;
-    /* loop_discard over max makes game crash, while frame_discard seems to ignore it */
+    /* loop_discard over max makes game crash, while frame_last seems to ignore it */
-    if (header->loop_discard > TAC_FRAME_SAMPLES || header->frame_discard > TAC_FRAME_SAMPLES)
+    if (header->loop_discard > TAC_FRAME_SAMPLES || header->frame_last + 1 > TAC_FRAME_SAMPLES)
        return TAC_PROCESS_HEADER_ERROR;
    /* looping makes sense */
    if (header->loop_frame > header->frame_count || header->loop_offset > header->file_size)
@ -1213,7 +1226,7 @@ int tac_decode_frame(tac_handle_t* handle, const uint8_t* block) {
        if (pos + 0x08 + frame_size > TAC_BLOCK_SIZE)
            return TAC_PROCESS_ERROR_SIZE;
-        /* from tests seems CRC errors cause current frame to be skipped */
+        /* from tests seems CRC errors cause current frame to be skipped, so change values before validations */
        handle->frame_number++;
        handle->frame_offset += 0x08 + frame_size;
@ -1228,6 +1241,9 @@ int tac_decode_frame(tac_handle_t* handle, const uint8_t* block) {
        /* main decode */
        decode_vu1(handle);
        /* post process */
        finalize_output(handle);
    }
    /* current frame decoded and samples can be requested */
--- a/src/coding/tac_decoder_lib.h
+++ b/src/coding/tac_decoder_lib.h
@ -28,7 +28,7 @@ typedef struct {
    uint16_t loop_frame;        /* aligned to block start */
    uint16_t loop_discard;      /* discarded start samples in loop frame (lower = outputs more) */
    uint16_t frame_count;       /* number of valid frames ("block end" frames not included) */
-    uint16_t frame_discard;     /* discarded end samples in final frame (lower = outputs less), even for non-looped files */
+    uint16_t frame_last;        /* valid samples in final frame - 1 (lower = outputs less, 0 = outputs 1), even for non-looped files */
    uint32_t loop_offset;       /* points to a block; file size if not looped */
    uint32_t file_size;         /* block aligned; actual file size can be a bit smaller if last block is truncated */
    uint32_t joint_stereo;      /* usually 0 and rarely 1 */
--- a/src/meta/tac.c
+++ b/src/meta/tac.c
@ -5,7 +5,7 @@
 VGMSTREAM* init_vgmstream_tac(STREAMFILE* sf) {
    VGMSTREAM* vgmstream = NULL;
    int loop_flag, channel_count;
-    uint16_t loop_frame, frame_count, loop_discard, frame_discard;
+    uint16_t loop_frame, frame_count, loop_discard, frame_last;
    uint32_t info_offset, loop_offset, stream_size, file_size;
    off_t start_offset;
@ -16,14 +16,14 @@ VGMSTREAM* init_vgmstream_tac(STREAMFILE* sf) {
     * .pk3/.20: extremely ugly fake extensions randomly given by an old extractor, *DON'T* */
    if (!check_extensions(sf, ",aac,laac"))
        goto fail;
-    /* file is validated on decoder init, early catch of simple errors (see decoder for full header) */
+    /* file is validated on decoder init, early catch of simple errors (see tac_decoder_lib.h for full header) */
    info_offset = read_u32le(0x00,sf);
    if (info_offset > 0x4E000 || info_offset < 0x20) /* offset points to value inside first "block" */
        goto fail;
    loop_frame      = read_u16le(0x08,sf);
    loop_discard    = read_u16le(0x0a,sf);
    frame_count     = read_u16le(0x0c,sf);
-    frame_discard   = read_u16le(0x0e,sf);
+    frame_last      = read_u16le(0x0e,sf);
    loop_offset     = read_u32le(0x10,sf);
    stream_size     = read_u32le(0x14,sf);
    if (stream_size % 0x4E000 != 0) /* multiple of blocks */
@ -45,7 +45,10 @@ VGMSTREAM* init_vgmstream_tac(STREAMFILE* sf) {
    vgmstream->meta_type = meta_TAC;
    vgmstream->sample_rate = 48000;
-    vgmstream->num_samples = frame_count * 1024 - (1024 - frame_discard);
+
    /* Frame at count/loop outputs less than full 1024 samples (thus loop or count-1 + extra).
     * A few files may pop when looping, but this seems to match game/emulator. */
    vgmstream->num_samples = (frame_count - 1) * 1024 + (frame_last + 1);
    vgmstream->loop_start_sample = (loop_frame - 1) * 1024 + loop_discard;
    vgmstream->loop_end_sample = vgmstream->num_samples;