Handle XA ADPCM in a more standard way

It was decoding one 28-sample subframes and blocked layout handled frames, doing some voodoo to skip CD sectors. Now decodes one 28*8-sample frame and blocked layout handles CD sectors. Decoding is byte-exact vs before.
2024-11-14 18:47:39 +01:00 · 2018-07-22 23:08:09 +02:00 · 2018-07-22 23:08:09 +02:00 · 0dc2c81ab9
commit 0dc2c81ab9
parent 8e16eb108c
6 changed files with 154 additions and 119 deletions
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@ -82,7 +82,7 @@ size_t ps_cfg_bytes_to_samples(size_t bytes, size_t frame_size, int channels);
 void decode_hevag(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do);

 /* xa_decoder */
-void decode_xa(VGMSTREAM * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
+void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
 size_t xa_bytes_to_samples(size_t bytes, int channels, int is_blocked);

 /* ea_xa_decoder */
--- a/src/coding/xa_decoder.c
+++ b/src/coding/xa_decoder.c
@ -8,8 +8,9 @@
 /* XA ADPCM gain values */
 static const double K0[4] = { 0.0, 0.9375, 1.796875,  1.53125 };
 static const double K1[4] = { 0.0,    0.0,  -0.8125,-0.859375};
-static int IK0(int fid) { return ((int)((-K0[fid]) * (1 << 10))); } /* K0/1 floats to int, K*2^10 = K*(1<<10) = K*1024 */
-static int IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }
+/* K0/1 floats to int, K*2^10 = K*(1<<10) = K*1024 */
+static int get_IK0(int fid) { return ((int)((-K0[fid]) * (1 << 10))); }
+static int get_IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }

 /* Sony XA ADPCM, defined for CD-DA/CD-i in the "Red Book" (private) or "Green Book" (public) specs.
 * The algorithm basically is BRR (Bit Rate Reduction) from the SNES SPC700, while the data layout is new.
@ -29,8 +30,8 @@ static int IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }
 *   (rounding differences should be inaudible, so public implementations may be approximations)
 *
 * Various XA descendants (PS-ADPCM, EA-XA, NGC DTK, FADPCM, etc) do filters/rounding slightly
- * differently, using one of the above methods in software/CPU, but in XA's case may be done like
- * the SNES/SPC700 BRR, with specific per-filter ops.
+ * differently, maybe using one of the above methods in software/CPU, but in XA's case may be done
+ * like the SNES/SPC700 BRR, with specific per-filter ops.
 * int coef tables commonly use N = 6 or 8, so K0 0.9375*64 = 60 or 0.9375*256 = 240
 * PS1 XA is apparently upsampled and interpolated to 44100, vgmstream doesn't simulate this.
 *
@ -38,59 +39,105 @@ static int IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }
 * BRR info (no$sns): http://problemkaputt.de/fullsnes.htm#snesapudspbrrsamples
 *           (bsnes): https://gitlab.com/higan/higan/blob/master/higan/sfc/dsp/brr.cpp
 */
-void decode_xa(VGMSTREAM * vgmstream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) {
-    static int head_table[8] = {0,2,8,10};
-    VGMSTREAMCHANNEL * stream = &(vgmstream->ch[channel]);
-    off_t sp_offset;
-    int i;
-    int frames_in, sample_count = 0;
-    int32_t coef1, coef2, coef_index, shift_factor;
+
+void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) {
+    off_t frame_offset, sp_offset;
+    int i,j, frames_in, samples_done = 0, sample_count = 0;
+    size_t bytes_per_frame, samples_per_frame;
    int32_t hist1 = stream->adpcm_history1_32;
    int32_t hist2 = stream->adpcm_history2_32;

    /* external interleave (fixed size), mono/stereo */
-    frames_in = first_sample / (28*2 / channelspacing);
-    first_sample = first_sample % 28;
+    bytes_per_frame = 0x80;
+    samples_per_frame = 28*8 / channelspacing;
+    frames_in = first_sample / samples_per_frame;
+    first_sample = first_sample % samples_per_frame;

-    /* hack for mono/stereo handling */
-    vgmstream->xa_get_high_nibble = !vgmstream->xa_get_high_nibble;
-    if (first_sample && channelspacing==1)
-        vgmstream->xa_get_high_nibble = !vgmstream->xa_get_high_nibble;
+    /* data layout (mono):
+     * - CD-XA audio is divided into sectors ("audio blocks"), each with 18 size 0x80 frames
+     *   (handled externally, this decoder only gets frames)
+     * - a frame ("sound group") is divided into 8 subframes ("sound unit"), with
+     *   subframe headers ("sound parameters") first then subframe nibbles ("sound data")
+     * - headers: 0..3 + repeat 0..3 + 4..7 + repeat 4..7 (where N = subframe N header)
+     *   (repeats may be for error correction, though probably unused)
+     * - nibbles: 32b with nibble0 for subframes 0..8, 32b with nibble1 for subframes 0..8, etc
+     *   (low first: 32b = sf1-n0 sf0-n0  sf3-n0 sf2-n0  sf5-n0 sf4-n0  sf7-n0 sf6-n0, etc)
+     *
+     * stereo layout is the same but alternates channels: subframe 0/2/4/6=L, subframe 1/3/5/7=R
+     *
+     * example:
+     *   subframe 0: header @ 0x00 or 0x04, 28 nibbles (low)  @ 0x10,14,18,1c,20 ... 7c
+     *   subframe 1: header @ 0x01 or 0x05, 28 nibbles (high) @ 0x10,14,18,1c,20 ... 7c
+     *   subframe 2: header @ 0x02 or 0x06, 28 nibbles (low)  @ 0x11,15,19,1d,21 ... 7d
+     *   ...
+     *   subframe 7: header @ 0x0b or 0x0f, 28 nibbles (high) @ 0x13,17,1b,1f,23 ... 7f
+     */
+    frame_offset = stream->offset + bytes_per_frame*frames_in;

-    /* parse current sound unit (subframe) sound parameters */
-    sp_offset = stream->offset+head_table[frames_in]+vgmstream->xa_get_high_nibble;
-    coef_index   = (read_8bit(sp_offset,stream->streamfile) >> 4) & 0xf;
-    shift_factor = (read_8bit(sp_offset,stream->streamfile)     ) & 0xf;
-
-    VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %lx\n", sp_offset);
-    if (coef_index > 4)
-        coef_index = 0; /* only 4 filters are used, rest is apparently 0 */
-    if (shift_factor > 12)
-        shift_factor = 9; /* supposedly, from Nocash PSX docs */
-
-    coef1 = IK0(coef_index);
-    coef2 = IK1(coef_index);
+    if (read_32bitBE(frame_offset+0x00,stream->streamfile) != read_32bitBE(frame_offset+0x04,stream->streamfile) ||
+        read_32bitBE(frame_offset+0x08,stream->streamfile) != read_32bitBE(frame_offset+0x0c,stream->streamfile)) {
+        VGM_LOG("bad frames at %lx\n", frame_offset);
+    }


-    /* decode nibbles */
-    for (i = first_sample; i < first_sample + samples_to_do; i++) {
-        int32_t new_sample;
-        uint8_t nibbles = (uint8_t)read_8bit(stream->offset+0x10+frames_in+(i*0x04),stream->streamfile);
+    /* decode subframes */
+    for (i = 0; i < 8 / channelspacing; i++) {
+        int32_t coef1, coef2;
+        uint8_t coef_index, shift_factor;

-        new_sample = vgmstream->xa_get_high_nibble ?
-                (nibbles >> 4) & 0x0f :
-                (nibbles     ) & 0x0f;
-        new_sample = (int16_t)((new_sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */
-        new_sample = new_sample << 4;
-        new_sample = new_sample - ((coef1*hist1 + coef2*hist2) >> 10);
+        /* parse current subframe (sound unit)'s header (sound parameters) */
+        sp_offset = frame_offset + 0x04 + i*channelspacing + channel;
+        coef_index   = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 4) & 0xf;
+        shift_factor = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 0) & 0xf;

-        hist2 = hist1;
-        hist1 = new_sample; /* must go before clamp, somehow */
-        new_sample = new_sample >> 4;
-        new_sample = clamp16(new_sample);
+        VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %lx\n", sp_offset);
+        if (coef_index > 4)
+            coef_index = 0; /* only 4 filters are used, rest is apparently 0 */
+        if (shift_factor > 12)
+            shift_factor = 9; /* supposedly, from Nocash PSX docs */

-        outbuf[sample_count] = new_sample;
-        sample_count += channelspacing;
+        coef1 = get_IK0(coef_index);
+        coef2 = get_IK1(coef_index);
+
+
+        /* decode subframe nibbles */
+        for(j = 0; j < 28; j++) {
+            uint8_t nibbles;
+            int32_t new_sample;
+
+            off_t su_offset = (channelspacing==1) ?
+                    frame_offset + 0x10 + j*0x04 + (i/2) : /* mono */
+                    frame_offset + 0x10 + j*0x04 + i;      /* stereo */
+            int get_high_nibble = (channelspacing==1) ?
+                    (i&1) :         /* mono (even subframes = low, off subframes = high) */
+                    (channel == 1); /* stereo (L channel / even subframes = low, R channel / odd subframes = high) */
+
+            /* skip half decodes to make sure hist isn't touched (kinda hack-ish) */
+            if (!(sample_count >= first_sample && samples_done < samples_to_do)) {
+                sample_count++;
+                continue;
+            }
+
+            nibbles = (uint8_t)read_8bit(su_offset,stream->streamfile);
+
+            new_sample = get_high_nibble ?
+                    (nibbles >> 4) & 0x0f :
+                    (nibbles     ) & 0x0f;
+
+            new_sample = (int16_t)((new_sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */
+            new_sample = new_sample << 4;
+            new_sample = new_sample - ((coef1*hist1 + coef2*hist2) >> 10);
+
+            hist2 = hist1;
+            hist1 = new_sample; /* must go before clamp, somehow */
+            new_sample = new_sample >> 4;
+            new_sample = clamp16(new_sample);
+
+            outbuf[samples_done * channelspacing] = new_sample;
+            samples_done++;
+
+            sample_count++;
+        }
    }

    stream->adpcm_history1_32 = hist1;
--- a/src/layout/blocked_xa.c
+++ b/src/layout/blocked_xa.c
@ -2,30 +2,32 @@
 #include "../coding/coding.h"
 #include "../vgmstream.h"

-/* set up for the block at the given offset */
+/* parse a CD-XA raw mode2/form2 sector */
 void block_update_xa(off_t block_offset, VGMSTREAM * vgmstream) {
+    STREAMFILE* streamFile = vgmstream->ch[0].streamfile;
    int i;
-    int8_t currentChannel=0;
-    int8_t subAudio=0;
+    size_t block_samples;
+    uint8_t xa_submode;

-    vgmstream->xa_get_high_nibble = 1; /* reset nibble order */
-
-    /* don't change this variable in the init process */
-    if (vgmstream->samples_into_block != 0)
-        vgmstream->xa_sector_length += 0x80;

    /* XA mode2/form2 sector, size 0x930
     * 0x00: sync word
     * 0x0c: header = minute, second, sector, mode (always 0x02)
-     * 0x10: subheader = file, channel (marker), submode flags, xa header
-     * 0x14: subheader again
+     * 0x10: subheader = file, channel (substream marker), submode flags, xa header
+     * 0x14: subheader again (for error correction)
     * 0x18: data
     * 0x918: unused
     * 0x92c: EDC/checksum or null
     * 0x930: end
-     * (in non-blocked ISO 2048 mode1/data chunks are 0x800)
     */

+    /* channel markers supposedly could be used to interleave streams, ex. audio languages within video
+     * (extractors may split .XA using channels?) */
+    VGM_ASSERT(block_offset + 0x930 < get_streamfile_size(streamFile) &&
+            (uint8_t)read_8bit(block_offset + 0x000 + 0x11,streamFile) !=
+            (uint8_t)read_8bit(block_offset + 0x930 + 0x11,streamFile),
+            "XA block: subchannel change at %lx\n", block_offset);
+
    /* submode flag bits (typical audio value = 0x64)
     * - 7: end of file
     * - 6: real time mode
@ -36,38 +38,22 @@ void block_update_xa(off_t block_offset, VGMSTREAM * vgmstream) {
     * - 1: video sector
     * - 0: end of audio
     */
+    xa_submode = (uint8_t)read_8bit(block_offset + 0x12,streamFile);

-    // We get to the end of a sector ?
-    if (vgmstream->xa_sector_length == (18*0x80)) {
-        vgmstream->xa_sector_length = 0;
-
-        // 0x30 of unused bytes/sector :(
-        if (!vgmstream->xa_headerless) {
-            block_offset += 0x30;
-begin:
-            // Search for selected channel & valid audio
-            currentChannel = read_8bit(block_offset-0x07,vgmstream->ch[0].streamfile);
-            subAudio = read_8bit(block_offset-0x06,vgmstream->ch[0].streamfile);
-
-            // audio is coded as 0x64
-            if (!((subAudio==0x64) && (currentChannel==vgmstream->xa_channel))) {
-                // go to next sector
-                block_offset += 0x930;
-                if (currentChannel!=-1) goto begin;
-            }
-        }
+    /* audio sector must set/not set certain flags, as per spec */
+    if ((xa_submode & 0x20) && !(xa_submode & 0x08) && (xa_submode & 0x04) && !(xa_submode & 0x02) ) {
+        block_samples = (28*8 / vgmstream->channels) * 18; /* size 0x900, 18 frames of size 0x80 with 8 subframes of 28 samples */
+    }
+    else {
+        block_samples = 0; /* not an audio sector */
+        ;VGM_LOG("XA block: non audio block found at %lx\n", block_offset);
    }

    vgmstream->current_block_offset = block_offset;
+    vgmstream->current_block_samples = block_samples;
+    vgmstream->next_block_offset = block_offset + 0x930;

-    // Quid : how to stop the current channel ???
-    // i set up 0 to current_block_size to make vgmstream not playing bad samples
-    // another way to do it ???
-    // (as the number of samples can be false in cd-xa due to multi-channels)
-    vgmstream->current_block_size = (currentChannel==-1 ? 0 : 0x70);
-
-    vgmstream->next_block_offset = vgmstream->current_block_offset + 0x80;
-    for (i=0;i<vgmstream->channels;i++) {
-        vgmstream->ch[i].offset = vgmstream->current_block_offset;
+    for (i = 0; i < vgmstream->channels; i++) {
+        vgmstream->ch[i].offset = block_offset + 0x18;
    }
 }
--- a/src/meta/psx_cdxa.c
+++ b/src/meta/psx_cdxa.c
@ -7,11 +7,11 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {
    VGMSTREAM * vgmstream = NULL;
    off_t start_offset;
    int loop_flag = 0, channel_count, sample_rate;
-    int xa_channel=0;
    int is_blocked;
    size_t file_size = get_streamfile_size(streamFile);

-    /* check extension (.xa: common, .str: sometimes used) */
+    /* checks
+     * .xa: common, .str: sometimes (mainly videos) */
    if ( !check_extensions(streamFile,"xa,str") )
        goto fail;

@ -33,7 +33,7 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {
            is_blocked = 1;
            start_offset = 0x00;
        }
-        else { /* headerless */
+        else { /* headerless and incorrectly ripped */
            is_blocked = 0;
            start_offset = 0x00;
        }
@ -50,9 +50,9 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {
            test_offset += (is_blocked ? 0x18 : 0x00); /* header */

            for (i = 0; i < (sector_size/block_size); i++) {
-                /* first 0x10 ADPCM predictors should be 0..3 index */
+                /* first 0x10 ADPCM filter index should be 0..3 */
                for (j = 0; j < 16; j++) {
-                    uint8_t header = read_8bit(test_offset + i, streamFile);
+                    uint8_t header = (uint8_t)read_8bit(test_offset + i, streamFile);
                    if (((header >> 4) & 0xF) > 3)
                        goto fail;
                }
@ -66,13 +66,10 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {

    /* data is ok: parse header */
    if (is_blocked) {
-        uint8_t xa_header;
-
        /* parse 0x18 sector header (also see xa_blocked.c)  */
-        xa_channel = read_8bit(start_offset + 0x11,streamFile);
-        xa_header  = read_8bit(start_offset + 0x13,streamFile);
+        uint8_t xa_header = (uint8_t)read_8bit(start_offset + 0x13,streamFile);

-        switch((xa_header >> 0) & 3) { /* 0..1: stereo */
+        switch((xa_header >> 0) & 3) { /* 0..1: mono/stereo */
            case 0: channel_count = 1; break;
            case 1: channel_count = 2; break;
            default: goto fail;
@ -82,16 +79,29 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {
            case 1: sample_rate = 18900; break;
            default: goto fail;
        }
-        VGM_ASSERT(((xa_header >> 4) & 3) == 1, /* 4..5: bits per sample (0=4, 1=8) */
-                "XA: 8 bits per sample mode found\n"); /* spec only? */
-        /* 6: emphasis (applies a filter but apparently not used by games)
-         *   XA is also filtered when resampled to 44100 during output, differently from PS-ADPCM */
-        /* 7: reserved */
+        switch((xa_header >> 4) & 3) { /* 4..5: bits per sample (0=4, 1=8) */
+            case 0: break;
+            default: /* PS1 games only do 4b */
+                VGM_LOG("XA: unknown bits per sample found\n");
+                goto fail;
+        }
+        switch((xa_header >> 6) & 1) { /* 6: emphasis (applies a filter) */
+            case 0: break;
+            default: /*  shouldn't be used by games */
+                VGM_LOG("XA: unknown emphasis found\n");
+                 break;
+        }
+        switch((xa_header >> 7) & 1) { /* 7: reserved */
+            case 0: break;
+            default:
+                VGM_LOG("XA: unknown reserved bit found\n");
+                 break;
+        }
    }
    else {
-        /* headerless, probably will go wrong */
+        /* headerless, probably will sound wrong */
        channel_count = 2;
-        sample_rate = 44100; /* not 37800? */
+        sample_rate = 37800;
    }


@ -100,23 +110,19 @@ VGMSTREAM * init_vgmstream_cdxa(STREAMFILE *streamFile) {
    if (!vgmstream) goto fail;

    vgmstream->sample_rate = sample_rate;
+    //todo do block_updates to find num_samples? (to skip non-audio blocks)
    vgmstream->num_samples = xa_bytes_to_samples(file_size - start_offset, channel_count, is_blocked);
-    vgmstream->xa_headerless = !is_blocked;
-    vgmstream->xa_channel = xa_channel;

-    vgmstream->coding_type = coding_XA;
-    vgmstream->layout_type = layout_blocked_xa;
    vgmstream->meta_type = meta_PSX_XA;
-
-    if (is_blocked)
-        start_offset += 0x18; /* move to first frame (hack for xa_blocked.c) */
+    vgmstream->coding_type = coding_XA;
+    vgmstream->layout_type = is_blocked ? layout_blocked_xa : layout_none;

    /* open the file for reading */
    if ( !vgmstream_open_stream(vgmstream, streamFile, start_offset) )
        goto fail;

-    block_update_xa(start_offset,vgmstream);
-
+    if (vgmstream->layout_type == layout_blocked_xa)
+        block_update_xa(start_offset,vgmstream);
    return vgmstream;

 fail:
--- a/src/vgmstream.c
+++ b/src/vgmstream.c
@ -1077,6 +1077,7 @@ int get_vgmstream_samples_per_frame(VGMSTREAM * vgmstream) {
            return 32;

        case coding_XA:
+            return 28*8 / vgmstream->channels; /* 8 subframes per frame, mono/stereo */
        case coding_PSX:
        case coding_PSX_badflags:
        case coding_HEVAG:
@ -1241,7 +1242,7 @@ int get_vgmstream_frame_size(VGMSTREAM * vgmstream) {
            return 0x22;

        case coding_XA:
-            return 0x0e*vgmstream->channels;
+            return 0x80;
        case coding_PSX:
        case coding_PSX_badflags:
        case coding_HEVAG:
@ -1581,7 +1582,7 @@ void decode_vgmstream(VGMSTREAM * vgmstream, int samples_written, int samples_to
            break;
        case coding_XA:
            for (chan=0;chan<vgmstream->channels;chan++) {
-                decode_xa(vgmstream,buffer+samples_written*vgmstream->channels+chan,
+                decode_xa(&vgmstream->ch[chan],buffer+samples_written*vgmstream->channels+chan,
                        vgmstream->channels,vgmstream->samples_into_block,
                        samples_to_do,chan);
            }
--- a/src/vgmstream.h
+++ b/src/vgmstream.h
@ -796,11 +796,6 @@ typedef struct {
    int codec_endian;               /* little/big endian marker; name is left vague but usually means big endian */
    int codec_version;              /* flag for codecs with minor variations */

-    uint8_t xa_channel;             /* XA ADPCM: selected channel */
-    int32_t xa_sector_length;       /* XA ADPCM: XA block */
-    uint8_t xa_headerless;          /* XA ADPCM: headerless XA */
-    int8_t xa_get_high_nibble;      /* XA ADPCM: mono/stereo nibble selection (XA state could be simplified) */
-
    int32_t ws_output_size;         /* WS ADPCM: output bytes for this block */

    void * start_vgmstream;         /* a copy of the VGMSTREAM as it was at the beginning of the stream (for AAX/AIX/SCD) */