From 35f5da2ac3581fa803e4addf122225ac1badab12 Mon Sep 17 00:00:00 2001
From: bnnm <bananaman255@gmail.com>
Date: Sat, 5 Oct 2019 15:10:40 +0200
Subject: [PATCH] Improve performance of ADX/XA/PSX/HEVAG/DSP decoders

---
 src/coding/adx_decoder.c     | 191 +++++++++++------------------------
 src/coding/coding.h          |   9 +-
 src/coding/ngc_dsp_decoder.c | 129 ++++++++++++++---------
 src/coding/psv_decoder.c     |  81 ++++++++-------
 src/coding/psx_decoder.c     |  45 ++++++---
 src/coding/xa_decoder.c      |  63 ++++++------
 src/vgmstream.c              |  45 ++++-----
 7 files changed, 267 insertions(+), 296 deletions(-)

diff --git a/src/coding/adx_decoder.c b/src/coding/adx_decoder.c
index f27b26d3..f906ad0f 100644
--- a/src/coding/adx_decoder.c
+++ b/src/coding/adx_decoder.c
@@ -1,155 +1,84 @@
 #include "coding.h"
 #include "../util.h"
 
-void decode_adx(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) {
-    int i;
-    int32_t sample_count;
-    int32_t frame_samples = (frame_bytes - 2) * 2;
-
-    int framesin = first_sample/frame_samples;
-
-    int32_t scale = read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile) + 1;
+void decode_adx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_size, coding_t coding_type) {
+    uint8_t frame[0x12] = {0};
+    off_t frame_offset;
+    int i, frames_in, sample_count = 0;
+    size_t bytes_per_frame, samples_per_frame;
+    int scale, coef1, coef2;
     int32_t hist1 = stream->adpcm_history1_32;
     int32_t hist2 = stream->adpcm_history2_32;
-    int coef1 = stream->adpcm_coef[0];
-    int coef2 = stream->adpcm_coef[1];
 
-    first_sample = first_sample%frame_samples;
 
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = read_8bit(stream->offset+framesin*frame_bytes +2+i/2,stream->streamfile);
+    /* external interleave (fixed size), mono */
+    bytes_per_frame = frame_size;
+    samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 32 */
+    frames_in = first_sample / samples_per_frame;
+    first_sample = first_sample % samples_per_frame;
 
-        outbuf[sample_count] = clamp16(
-                (i&1?
-                 get_low_nibble_signed(sample_byte):
-                 get_high_nibble_signed(sample_byte)
-                ) * scale +
-                (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12)
-                );
+    /* parse frame header */
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
 
-        hist2 = hist1;
-        hist1 = outbuf[sample_count];
+    scale = get_16bitBE(frame+0x00);
+    switch(coding_type) {
+        case coding_CRI_ADX:
+            scale = scale + 1;
+            coef1 = stream->adpcm_coef[0];
+            coef2 = stream->adpcm_coef[1];
+            break;
+        case coding_CRI_ADX_exp:
+            scale = 1 << (12 - scale);
+            coef1 = stream->adpcm_coef[0];
+            coef2 = stream->adpcm_coef[1];
+            break;
+        case coding_CRI_ADX_fixed:
+            scale = (scale & 0x1fff) + 1;
+            coef1 = stream->adpcm_coef[(frame[0] >> 5)*2 + 0];
+            coef2 = stream->adpcm_coef[(frame[0] >> 5)*2 + 1];
+            break;
+        case coding_CRI_ADX_enc_8:
+        case coding_CRI_ADX_enc_9:
+            scale = ((scale ^ stream->adx_xor) & 0x1fff) + 1;
+            coef1 = stream->adpcm_coef[0];
+            coef2 = stream->adpcm_coef[1];
+            break;
+        default:
+            scale = scale + 1;
+            coef1 = stream->adpcm_coef[0];
+            coef2 = stream->adpcm_coef[1];
+            break;
     }
 
-    stream->adpcm_history1_32 = hist1;
-    stream->adpcm_history2_32 = hist2;
-}
+    /* decode nibbles */
+    for (i = first_sample; i < first_sample + samples_to_do; i++) {
+        int32_t sample = 0;
+        uint8_t nibbles = frame[0x02 + i/2];
 
-void decode_adx_exp(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) {
-    int i;
-    int32_t sample_count;
-    int32_t frame_samples = (frame_bytes - 2) * 2;
+        sample = i&1 ? /* high nibble first */
+                get_low_nibble_signed(nibbles):
+                get_high_nibble_signed(nibbles);
+        sample = sample * scale + (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12);
+        sample = clamp16(sample);
 
-    int framesin = first_sample/frame_samples;
-
-    int32_t scale = read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile);
-    int32_t hist1, hist2;
-    int coef1, coef2;
-    scale = 1 << (12 - scale);
-    hist1 = stream->adpcm_history1_32;
-    hist2 = stream->adpcm_history2_32;
-    coef1 = stream->adpcm_coef[0];
-    coef2 = stream->adpcm_coef[1];
-
-    first_sample = first_sample%frame_samples;
-
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = read_8bit(stream->offset+framesin*frame_bytes +2+i/2,stream->streamfile);
-
-        outbuf[sample_count] = clamp16(
-                (i&1?
-                 get_low_nibble_signed(sample_byte):
-                 get_high_nibble_signed(sample_byte)
-                ) * scale +
-                (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12)
-                );
+        outbuf[sample_count] = sample;
+        sample_count += channelspacing;
 
         hist2 = hist1;
-        hist1 = outbuf[sample_count];
-    }
-
-    stream->adpcm_history1_32 = hist1;
-    stream->adpcm_history2_32 = hist2;
-}
-
-void decode_adx_fixed(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) {
-    int i;
-    int32_t sample_count;
-    int32_t frame_samples = (frame_bytes - 2) * 2;
-
-    int framesin = first_sample/frame_samples;
-
-    int32_t scale = (read_16bitBE(stream->offset + framesin*frame_bytes, stream->streamfile) & 0x1FFF) + 1;
-    int32_t predictor = read_8bit(stream->offset + framesin*frame_bytes, stream->streamfile) >> 5;
-    int32_t hist1 = stream->adpcm_history1_32;
-    int32_t hist2 = stream->adpcm_history2_32;
-    int coef1 = stream->adpcm_coef[predictor * 2];
-    int coef2 = stream->adpcm_coef[predictor * 2 + 1];
-
-    first_sample = first_sample%frame_samples;
-
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = read_8bit(stream->offset+framesin*frame_bytes +2+i/2,stream->streamfile);
-
-        outbuf[sample_count] = clamp16(
-                (i&1?
-                 get_low_nibble_signed(sample_byte):
-                 get_high_nibble_signed(sample_byte)
-                ) * scale +
-                (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12)
-                );
-
-        hist2 = hist1;
-        hist1 = outbuf[sample_count];
-    }
-
-    stream->adpcm_history1_32 = hist1;
-    stream->adpcm_history2_32 = hist2;
-}
-
-void adx_next_key(VGMSTREAMCHANNEL * stream)
-{
-    stream->adx_xor = ( stream->adx_xor * stream->adx_mult + stream->adx_add ) & 0x7fff;
-}
-
-void decode_adx_enc(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) {
-    int i;
-    int32_t sample_count;
-    int32_t frame_samples = (frame_bytes - 2) * 2;
-
-    int framesin = first_sample/frame_samples;
-
-    int32_t scale = ((read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile) ^ stream->adx_xor)&0x1fff) + 1;
-    int32_t hist1 = stream->adpcm_history1_32;
-    int32_t hist2 = stream->adpcm_history2_32;
-    int coef1 = stream->adpcm_coef[0];
-    int coef2 = stream->adpcm_coef[1];
-
-    first_sample = first_sample%frame_samples;
-
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = read_8bit(stream->offset+framesin*frame_bytes +2+i/2,stream->streamfile);
-
-        outbuf[sample_count] = clamp16(
-                (i&1?
-                 get_low_nibble_signed(sample_byte):
-                 get_high_nibble_signed(sample_byte)
-                ) * scale +
-                (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12)
-                );
-
-        hist2 = hist1;
-        hist1 = outbuf[sample_count];
+        hist1 = sample;
     }
 
     stream->adpcm_history1_32 = hist1;
     stream->adpcm_history2_32 = hist2;
 
-    if (!(i % 32)) {
-        for (i=0;i<stream->adx_channels;i++)
-        {
+    if ((coding_type == coding_CRI_ADX_enc_8 || coding_type == coding_CRI_ADX_enc_9) && !(i % 32)) {
+        for (i =0; i < stream->adx_channels; i++) {
             adx_next_key(stream);
         }
     }
-
+}
+
+void adx_next_key(VGMSTREAMCHANNEL * stream) {
+    stream->adx_xor = (stream->adx_xor * stream->adx_mult + stream->adx_add) & 0x7fff;
 }
diff --git a/src/coding/coding.h b/src/coding/coding.h
index 9b1f378d..6277f244 100644
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@@ -4,10 +4,7 @@
 #include "../vgmstream.h"
 
 /* adx_decoder */
-void decode_adx(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes);
-void decode_adx_exp(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes);
-void decode_adx_fixed(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes);
-void decode_adx_enc(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes);
+void decode_adx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes, coding_t coding_type);
 void adx_next_key(VGMSTREAMCHANNEL * stream);
 
 /* g721_decoder */
@@ -92,10 +89,10 @@ size_t ps_cfg_bytes_to_samples(size_t bytes, size_t frame_size, int channels);
 int ps_check_format(STREAMFILE *streamFile, off_t offset, size_t max);
 
 /* psv_decoder */
-void decode_hevag(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do);
+void decode_hevag(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do);
 
 /* xa_decoder */
-void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
+void decode_xa(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
 size_t xa_bytes_to_samples(size_t bytes, int channels, int is_blocked);
 
 /* ea_xa_decoder */
diff --git a/src/coding/ngc_dsp_decoder.c b/src/coding/ngc_dsp_decoder.c
index 773e9612..6f7fe22c 100644
--- a/src/coding/ngc_dsp_decoder.c
+++ b/src/coding/ngc_dsp_decoder.c
@@ -1,69 +1,103 @@
 #include "coding.h"
 #include "../util.h"
 
+
 void decode_ngc_dsp(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) {
-    int i=first_sample;
-    int32_t sample_count;
-
-    int framesin = first_sample/14;
-
-    int8_t header = read_8bit(framesin*8+stream->offset,stream->streamfile);
-    int32_t scale = 1 << (header & 0xf);
-    int coef_index = (header >> 4) & 0xf;
+    uint8_t frame[0x08] = {0};
+    off_t frame_offset;
+    int i, frames_in, sample_count = 0;
+    size_t bytes_per_frame, samples_per_frame;
+    int coef_index, scale, coef1, coef2;
     int32_t hist1 = stream->adpcm_history1_16;
     int32_t hist2 = stream->adpcm_history2_16;
-    int coef1 = stream->adpcm_coef[coef_index*2];
-    int coef2 = stream->adpcm_coef[coef_index*2+1];
 
-    first_sample = first_sample%14;
 
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = read_8bit(framesin*8+stream->offset+1+i/2,stream->streamfile);
+    /* external interleave (fixed size), mono */
+    bytes_per_frame = 0x08;
+    samples_per_frame = (bytes_per_frame - 0x01) * 2; /* always 14 */
+    frames_in = first_sample / samples_per_frame;
+    first_sample = first_sample % samples_per_frame;
 
-        outbuf[sample_count] = clamp16((
-                 (((i&1?
-                    get_low_nibble_signed(sample_byte):
-                    get_high_nibble_signed(sample_byte)
-                   ) * scale)<<11) + 1024 +
-                 (coef1 * hist1 + coef2 * hist2))>>11
-                );
+    /* parse frame header */
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+    scale = 1 << ((frame[0] >> 0) & 0xf);
+    coef_index  = (frame[0] >> 4) & 0xf;
+
+    VGM_ASSERT_ONCE(coef_index > 8, "DSP: incorrect coefs at %x\n", (uint32_t)frame_offset);
+    //if (coef_index > 8) //todo not correctly clamped in original decoder?
+    //    coef_index = 8;
+
+    coef1 = stream->adpcm_coef[coef_index*2 + 0];
+    coef2 = stream->adpcm_coef[coef_index*2 + 1];
+
+
+    /* decode nibbles */
+    for (i = first_sample; i < first_sample + samples_to_do; i++) {
+        int32_t sample = 0;
+        uint8_t nibbles = frame[0x01 + i/2];
+
+        sample = i&1 ? /* high nibble first */
+                get_low_nibble_signed(nibbles) :
+                get_high_nibble_signed(nibbles);
+        sample = ((sample * scale) << 11);
+        sample = (sample + 1024 + coef1*hist1 + coef2*hist2) >> 11;
+        sample = clamp16(sample);
+
+        outbuf[sample_count] = sample;
+        sample_count += channelspacing;
 
         hist2 = hist1;
-        hist1 = outbuf[sample_count];
+        hist1 = sample;
     }
 
     stream->adpcm_history1_16 = hist1;
     stream->adpcm_history2_16 = hist2;
 }
 
-/* read from memory rather than a file */
-static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, uint8_t * mem) {
-    int i=first_sample;
-    int32_t sample_count;
 
-    int8_t header = mem[0];
-    int32_t scale = 1 << (header & 0xf);
-    int coef_index = (header >> 4) & 0xf;
+/* read from memory rather than a file */
+static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, uint8_t * frame) {
+    int i, sample_count = 0;
+    size_t bytes_per_frame, samples_per_frame;
+    int coef_index, scale, coef1, coef2;
     int32_t hist1 = stream->adpcm_history1_16;
     int32_t hist2 = stream->adpcm_history2_16;
-    int coef1 = stream->adpcm_coef[coef_index*2];
-    int coef2 = stream->adpcm_coef[coef_index*2+1];
 
-    first_sample = first_sample%14;
 
-    for (i=first_sample,sample_count=0; i<first_sample+samples_to_do; i++,sample_count+=channelspacing) {
-        int sample_byte = mem[1 + i/2];
+    /* external interleave (fixed size), mono */
+    bytes_per_frame = 0x08;
+    samples_per_frame = (bytes_per_frame - 0x01) * 2; /* always 14 */
+    first_sample = first_sample % samples_per_frame;
+    VGM_ASSERT_ONCE(samples_to_do > samples_per_frame, "DSP: layout error, too many samples\n");
 
-        outbuf[sample_count] = clamp16((
-                 (((i&1?
-                    get_low_nibble_signed(sample_byte):
-                    get_high_nibble_signed(sample_byte)
-                   ) * scale)<<11) + 1024 +
-                 (coef1 * hist1 + coef2 * hist2))>>11
-                );
+    /* parse frame header */
+    scale = 1 << ((frame[0] >> 0) & 0xf);
+    coef_index  = (frame[0] >> 4) & 0xf;
+
+    VGM_ASSERT_ONCE(coef_index > 8, "DSP: incorrect coefs\n");
+    //if (coef_index > 8) //todo not correctly clamped in original decoder?
+    //    coef_index = 8;
+
+    coef1 = stream->adpcm_coef[coef_index*2 + 0];
+    coef2 = stream->adpcm_coef[coef_index*2 + 1];
+
+    for (i = first_sample; i < first_sample + samples_to_do; i++) {
+        int32_t sample = 0;
+        uint8_t nibbles = frame[0x01 + i/2];
+
+        sample = i&1 ?
+                get_low_nibble_signed(nibbles) :
+                get_high_nibble_signed(nibbles);
+        sample = ((sample * scale) << 11);
+        sample = (sample + 1024 + coef1*hist1 + coef2*hist2) >> 11;
+        sample = clamp16(sample);
+
+        outbuf[sample_count] = sample;
+        sample_count += channelspacing;
 
         hist2 = hist1;
-        hist1 = outbuf[sample_count];
+        hist1 = sample;
     }
 
     stream->adpcm_history1_16 = hist1;
@@ -72,22 +106,21 @@ static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t *
 
 /* decode DSP with byte-interleaved frames (ex. 0x08: 1122112211221122) */
 void decode_ngc_dsp_subint(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int interleave) {
-    uint8_t sample_data[0x08];
+    uint8_t frame[0x08];
     int i;
+    int frames_in = first_sample / 14;
 
-    int framesin = first_sample/14;
-
-    for (i=0; i < 0x08; i++) {
+    for (i = 0; i < 0x08; i++) {
         /* base + current frame + subint section + subint byte + channel adjust */
-        sample_data[i] = read_8bit(
+        frame[i] = read_8bit(
                 stream->offset
-                + framesin*(0x08*channelspacing)
+                + frames_in*(0x08*channelspacing)
                 + i/interleave * interleave * channelspacing
                 + i%interleave
                 + interleave * channel, stream->streamfile);
     }
 
-    decode_ngc_dsp_subint_internal(stream, outbuf, channelspacing, first_sample, samples_to_do, sample_data);
+    decode_ngc_dsp_subint_internal(stream, outbuf, channelspacing, first_sample, samples_to_do, frame);
 }
 
 
diff --git a/src/coding/psv_decoder.c b/src/coding/psv_decoder.c
index 7136c837..a6d1b941 100644
--- a/src/coding/psv_decoder.c
+++ b/src/coding/psv_decoder.c
@@ -3,7 +3,7 @@
 #include "../util.h"
 
 /* PSVita ADPCM table */
-static const int16_t HEVAG_coefs[128][4] = {
+static const int16_t hevag_coefs[128][4] = {
         {      0,     0,     0,     0 },
         {   7680,     0,     0,     0 },
         {  14720, -6656,     0,     0 },
@@ -141,59 +141,58 @@ static const int16_t HEVAG_coefs[128][4] = {
  *
  * Original research and algorithm by id-daemon / daemon1.
  */
-void decode_hevag(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) {
-
-    uint8_t predict_nr, shift, flag, byte;
-    int32_t scale = 0;
-
-    int32_t sample;
+void decode_hevag(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) {
+    uint8_t frame[0x10] = {0};
+    off_t frame_offset;
+    int i, frames_in, sample_count = 0;
+    size_t bytes_per_frame, samples_per_frame;
+    int coef_index, shift_factor, flag;
     int32_t hist1 = stream->adpcm_history1_32;
     int32_t hist2 = stream->adpcm_history2_32;
     int32_t hist3 = stream->adpcm_history3_32;
     int32_t hist4 = stream->adpcm_history4_32;
 
-    int i, sample_count;
 
+    /* external interleave (fixed size), mono */
+    bytes_per_frame = 0x10;
+    samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 28 */
+    frames_in = first_sample / samples_per_frame;
+    first_sample = first_sample % samples_per_frame;
 
-    int framesin = first_sample / 28;
+    /* parse frame header */
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+    coef_index   = (frame[0] >> 4) & 0xf;
+    shift_factor = (frame[0] >> 0) & 0xf;
+    coef_index  = ((frame[1] >> 0) & 0xf0) | coef_index;
+    flag = (frame[1] >> 0) & 0xf; /* same flags */
 
-    /* 4 byte header: predictor = 3rd and 1st, shift = 2nd, flag = 4th */
-    byte = (uint8_t)read_8bit(stream->offset+framesin*16+0,stream->streamfile);
-    predict_nr   = byte >> 4;
-    shift = byte & 0x0f;
-    byte = (uint8_t)read_8bit(stream->offset+framesin*16+1,stream->streamfile);
-    predict_nr = (byte & 0xF0) | predict_nr;
-    flag = byte & 0x0f; /* no change in flags */
+    VGM_ASSERT_ONCE(coef_index > 127 || shift_factor > 12, "HEVAG: in+correct coefs/shift at %x\n", (uint32_t)frame_offset);
+    if (coef_index > 127)
+        coef_index = 127; /* ? */
+    if (shift_factor > 12)
+        shift_factor = 9; /* ? */
 
-    first_sample = first_sample % 28;
+    /* decode nibbles */
+    for (i = first_sample; i < first_sample + samples_to_do; i++) {
+        int32_t sample = 0, scale = 0;
 
-    if (first_sample & 1) { /* if first sample is odd, read byte first */
-        byte = read_8bit(stream->offset+(framesin*16)+2+first_sample/2,stream->streamfile);
-    }
+        if (flag < 0x07) { /* with flag 0x07 decoded sample must be 0 */
+            uint8_t nibbles = frame[0x02 + i/2];
 
-    for (i = first_sample, sample_count = 0; i < first_sample + samples_to_do; i++, sample_count += channelspacing) {
-        sample = 0;
-
-        if (flag < 7 && predict_nr < 128) {
-
-            if (i & 1) {/* odd/even nibble */
-                scale = byte >> 4;
-            } else {
-                byte = read_8bit(stream->offset+(framesin*16)+2+i/2,stream->streamfile);
-                scale = byte & 0x0f;
-            }
-            if (scale > 7) { /* sign extend */
-                scale = scale - 16;
-            }
-
-            sample = (hist1 * HEVAG_coefs[predict_nr][0] +
-                      hist2 * HEVAG_coefs[predict_nr][1] +
-                      hist3 * HEVAG_coefs[predict_nr][2] +
-                      hist4 * HEVAG_coefs[predict_nr][3] ) / 32;
-            sample = (sample + (scale << (20 - shift)) + 128) >> 8;
+            scale = i&1 ? /* low nibble first */
+                    get_high_nibble_signed(nibbles):
+                    get_low_nibble_signed(nibbles);
+            sample = (hist1 * hevag_coefs[coef_index][0] +
+                      hist2 * hevag_coefs[coef_index][1] +
+                      hist3 * hevag_coefs[coef_index][2] +
+                      hist4 * hevag_coefs[coef_index][3] ) / 32;
+            sample = (sample + (scale << (20 - shift_factor)) + 128) >> 8;
         }
 
-        outbuf[sample_count] = clamp16(sample);
+        outbuf[sample_count] = sample;
+        sample_count += channelspacing;
+
         hist4 = hist3;
         hist3 = hist2;
         hist2 = hist1;
diff --git a/src/coding/psx_decoder.c b/src/coding/psx_decoder.c
index f5e7ebd4..1107d097 100644
--- a/src/coding/psx_decoder.c
+++ b/src/coding/psx_decoder.c
@@ -2,7 +2,7 @@
 
 
 /* PS-ADPCM table, defined as rational numbers (as in the spec) */
-static const double ps_adpcm_coefs_f[5][2] = {
+static const float ps_adpcm_coefs_f[5][2] = {
         { 0.0      ,  0.0      }, //{   0.0        ,   0.0        },
         { 0.9375   ,  0.0      }, //{  60.0 / 64.0 ,   0.0        },
         { 1.796875 , -0.8125   }, //{ 115.0 / 64.0 , -52.0 / 64.0 },
@@ -44,6 +44,7 @@ static const int ps_adpcm_coefs_i[5][2] = {
 
 /* standard PS-ADPCM (float math version) */
 void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int is_badflags) {
+    uint8_t frame[0x10] = {0};
     off_t frame_offset;
     int i, frames_in, sample_count = 0;
     size_t bytes_per_frame, samples_per_frame;
@@ -51,6 +52,7 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing
     int32_t hist1 = stream->adpcm_history1_32;
     int32_t hist2 = stream->adpcm_history2_32;
 
+
     /* external interleave (fixed size), mono */
     bytes_per_frame = 0x10;
     samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 28 */
@@ -58,10 +60,11 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing
     first_sample = first_sample % samples_per_frame;
 
     /* parse frame header */
-    frame_offset = stream->offset + bytes_per_frame*frames_in;
-    coef_index   = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf;
-    shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf;
-    flag = (uint8_t)read_8bit(frame_offset+0x01,stream->streamfile); /* only lower nibble needed */
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+    coef_index   = (frame[0] >> 4) & 0xf;
+    shift_factor = (frame[0] >> 0) & 0xf;
+    flag = frame[1]; /* only lower nibble needed */
 
     VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset);
     if (coef_index > 5) /* needed by inFamous (PS3) (maybe it's supposed to use more filters?) */
@@ -73,18 +76,19 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing
         flag = 0;
     VGM_ASSERT_ONCE(flag > 7,"PS-ADPCM: unknown flag at %x\n", (uint32_t)frame_offset); /* meta should use PSX-badflags */
 
+
     /* decode nibbles */
     for (i = first_sample; i < first_sample + samples_to_do; i++) {
         int32_t sample = 0;
 
         if (flag < 0x07) { /* with flag 0x07 decoded sample must be 0 */
-            uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x02+i/2,stream->streamfile);
+            uint8_t nibbles = frame[0x02 + i/2];
 
             sample = i&1 ? /* low nibble first */
                     (nibbles >> 4) & 0x0f :
                     (nibbles >> 0) & 0x0f;
             sample = (int16_t)((sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */
-            sample = (int)(sample + ps_adpcm_coefs_f[coef_index][0]*hist1 + ps_adpcm_coefs_f[coef_index][1]*hist2);
+            sample = (int32_t)(sample + ps_adpcm_coefs_f[coef_index][0]*hist1 + ps_adpcm_coefs_f[coef_index][1]*hist2);
             sample = clamp16(sample);
         }
 
@@ -105,6 +109,7 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing
  *
  * Uses int math to decode, which seems more likely (based on FF XI PC's code in Moogle Toolbox). */
 void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int frame_size) {
+    uint8_t frame[0x50] = {0};
     off_t frame_offset;
     int i, frames_in, sample_count = 0;
     size_t bytes_per_frame, samples_per_frame;
@@ -112,6 +117,7 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c
     int32_t hist1 = stream->adpcm_history1_32;
     int32_t hist2 = stream->adpcm_history2_32;
 
+
     /* external interleave (variable size), mono */
     bytes_per_frame = frame_size;
     samples_per_frame = (bytes_per_frame - 0x01) * 2;
@@ -119,9 +125,10 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c
     first_sample = first_sample % samples_per_frame;
 
     /* parse frame header */
-    frame_offset = stream->offset + bytes_per_frame*frames_in;
-    coef_index   = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf;
-    shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf;
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+    coef_index   = (frame[0] >> 4) & 0xf;
+    shift_factor = (frame[0] >> 0) & 0xf;
 
     VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset);
     if (coef_index > 5) /* needed by Afrika (PS3) (maybe it's supposed to use more filters?) */
@@ -129,10 +136,11 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c
     if (shift_factor > 12)
         shift_factor = 9; /* supposedly, from Nocash PSX docs */
 
+
     /* decode nibbles */
     for (i = first_sample; i < first_sample + samples_to_do; i++) {
         int32_t sample = 0;
-        uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x01+i/2,stream->streamfile);
+        uint8_t nibbles = frame[0x01 + i/2];
 
         sample = i&1 ? /* low nibble first */
                 (nibbles >> 4) & 0x0f :
@@ -154,6 +162,7 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c
 
 /* PS-ADPCM from Pivotal games, exactly like psx_cfg but with float math (reverse engineered from the exe) */
 void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int frame_size) {
+    uint8_t frame[0x50] = {0};
     off_t frame_offset;
     int i, frames_in, sample_count = 0;
     size_t bytes_per_frame, samples_per_frame;
@@ -162,6 +171,7 @@ void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channe
     int32_t hist2 = stream->adpcm_history2_32;
     float scale;
 
+
     /* external interleave (variable size), mono */
     bytes_per_frame = frame_size;
     samples_per_frame = (bytes_per_frame - 0x01) * 2;
@@ -169,21 +179,24 @@ void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channe
     first_sample = first_sample % samples_per_frame;
 
     /* parse frame header */
-    frame_offset = stream->offset + bytes_per_frame*frames_in;
-    coef_index   = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf;
-    shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf;
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+    coef_index   = (frame[0] >> 4) & 0xf;
+    shift_factor = (frame[0] >> 0) & 0xf;
 
-    VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset);
+    VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM-piv: incorrect coefs/shift\n");
     if (coef_index > 5) /* just in case */
         coef_index = 5;
     if (shift_factor > 12) /* same */
         shift_factor = 12;
+
     scale = (float)(1.0 / (double)(1 << shift_factor));
 
+
     /* decode nibbles */
     for (i = first_sample; i < first_sample + samples_to_do; i++) {
         int32_t sample = 0;
-        uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x01+i/2,stream->streamfile);
+        uint8_t nibbles = frame[0x01 + i/2];
 
         sample = !(i&1) ? /* low nibble first */
                 (nibbles >> 0) & 0x0f :
diff --git a/src/coding/xa_decoder.c b/src/coding/xa_decoder.c
index 58c3c21a..6577707d 100644
--- a/src/coding/xa_decoder.c
+++ b/src/coding/xa_decoder.c
@@ -6,11 +6,13 @@
 // May be implemented like the SNES/SPC700 BRR.
 
 /* XA ADPCM gain values */
-static const double K0[4] = { 0.0, 0.9375, 1.796875,  1.53125 };
-static const double K1[4] = { 0.0,    0.0,  -0.8125,-0.859375};
-/* K0/1 floats to int, K*2^10 = K*(1<<10) = K*1024 */
-static int get_IK0(int fid) { return ((int)((-K0[fid]) * (1 << 10))); }
-static int get_IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }
+#if 0
+static const float K0[4] = { 0.0, 0.9375, 1.796875, 1.53125 };
+static const float K1[4] = { 0.0,    0.0,  -0.8125, -0.859375 };
+#endif
+/* K0/1 floats to int, -K*2^10 = -K*(1<<10) = -K*1024 */
+static const int IK0[4] = {  0, -960, -1840, -1568 };
+static const int IK1[4] = {  0,    0,   832,   880 };
 
 /* Sony XA ADPCM, defined for CD-DA/CD-i in the "Red Book" (private) or "Green Book" (public) specs.
  * The algorithm basically is BRR (Bit Rate Reduction) from the SNES SPC700, while the data layout is new.
@@ -40,18 +42,14 @@ static int get_IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); }
  *           (bsnes): https://gitlab.com/higan/higan/blob/master/higan/sfc/dsp/brr.cpp
  */
 
-void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) {
-    off_t frame_offset, sp_offset;
-    int i,j, frames_in, samples_done = 0, sample_count = 0;
+void decode_xa(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) {
+    uint8_t frame[0x80] = {0};
+    off_t frame_offset;
+    int i,j, sp_pos, frames_in, samples_done = 0, sample_count = 0;
     size_t bytes_per_frame, samples_per_frame;
     int32_t hist1 = stream->adpcm_history1_32;
     int32_t hist2 = stream->adpcm_history2_32;
 
-    /* external interleave (fixed size), mono/stereo */
-    bytes_per_frame = 0x80;
-    samples_per_frame = 28*8 / channelspacing;
-    frames_in = first_sample / samples_per_frame;
-    first_sample = first_sample % samples_per_frame;
 
     /* data layout (mono):
      * - CD-XA audio is divided into sectors ("audio blocks"), each with 18 size 0x80 frames
@@ -72,12 +70,19 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i
      *   ...
      *   subframe 7: header @ 0x0b or 0x0f, 28 nibbles (high) @ 0x13,17,1b,1f,23 ... 7f
      */
-    frame_offset = stream->offset + bytes_per_frame*frames_in;
 
-    if (read_32bitBE(frame_offset+0x00,stream->streamfile) != read_32bitBE(frame_offset+0x04,stream->streamfile) ||
-        read_32bitBE(frame_offset+0x08,stream->streamfile) != read_32bitBE(frame_offset+0x0c,stream->streamfile)) {
-        VGM_LOG("bad frames at %x\n", (uint32_t)frame_offset);
-    }
+    /* external interleave (fixed size), mono/stereo */
+    bytes_per_frame = 0x80;
+    samples_per_frame = 28*8 / channelspacing;
+    frames_in = first_sample / samples_per_frame;
+    first_sample = first_sample % samples_per_frame;
+
+    /* parse frame header */
+    frame_offset = stream->offset + bytes_per_frame * frames_in;
+    read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */
+
+    VGM_ASSERT(get_32bitBE(frame+0x0) != get_32bitBE(frame+0x4) || get_32bitBE(frame+0x8) != get_32bitBE(frame+0xC),
+               "bad frames at %x\n", (uint32_t)frame_offset);
 
 
     /* decode subframes */
@@ -86,18 +91,18 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i
         uint8_t coef_index, shift_factor;
 
         /* parse current subframe (sound unit)'s header (sound parameters) */
-        sp_offset = frame_offset + 0x04 + i*channelspacing + channel;
-        coef_index   = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 4) & 0xf;
-        shift_factor = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 0) & 0xf;
+        sp_pos = 0x04 + i*channelspacing + channel;
+        coef_index   = (frame[sp_pos] >> 4) & 0xf;
+        shift_factor = (frame[sp_pos] >> 0) & 0xf;
 
-        VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %x\n", (uint32_t)sp_offset);
+        VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %x\n", (uint32_t)frame_offset + sp_pos);
         if (coef_index > 4)
             coef_index = 0; /* only 4 filters are used, rest is apparently 0 */
         if (shift_factor > 12)
             shift_factor = 9; /* supposedly, from Nocash PSX docs */
 
-        coef1 = get_IK0(coef_index);
-        coef2 = get_IK1(coef_index);
+        coef1 = IK0[coef_index];
+        coef2 = IK1[coef_index];
 
 
         /* decode subframe nibbles */
@@ -105,9 +110,9 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i
             uint8_t nibbles;
             int32_t new_sample;
 
-            off_t su_offset = (channelspacing==1) ?
-                    frame_offset + 0x10 + j*0x04 + (i/2) : /* mono */
-                    frame_offset + 0x10 + j*0x04 + i;      /* stereo */
+            int su_pos = (channelspacing==1) ?
+                    0x10 + j*0x04 + (i/2) : /* mono */
+                    0x10 + j*0x04 + i;      /* stereo */
             int get_high_nibble = (channelspacing==1) ?
                     (i&1) :         /* mono (even subframes = low, off subframes = high) */
                     (channel == 1); /* stereo (L channel / even subframes = low, R channel / odd subframes = high) */
@@ -118,11 +123,11 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i
                 continue;
             }
 
-            nibbles = (uint8_t)read_8bit(su_offset,stream->streamfile);
+            nibbles = frame[su_pos];
 
             new_sample = get_high_nibble ?
                     (nibbles >> 4) & 0x0f :
-                    (nibbles     ) & 0x0f;
+                    (nibbles >> 0) & 0x0f;
 
             new_sample = (int16_t)((new_sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */
             new_sample = new_sample << 4;
diff --git a/src/vgmstream.c b/src/vgmstream.c
index b7223522..fc24f919 100644
--- a/src/vgmstream.c
+++ b/src/vgmstream.c
@@ -1495,37 +1495,15 @@ void decode_vgmstream(VGMSTREAM * vgmstream, int samples_written, int samples_to
 
     switch (vgmstream->coding_type) {
         case coding_CRI_ADX:
-            for (ch = 0; ch < vgmstream->channels; ch++) {
-                decode_adx(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch,
-                        vgmstream->channels,vgmstream->samples_into_block,samples_to_do,
-                        vgmstream->interleave_block_size);
-            }
-
-            break;
         case coding_CRI_ADX_exp:
-            for (ch = 0; ch < vgmstream->channels; ch++) {
-                decode_adx_exp(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch,
-                        vgmstream->channels,vgmstream->samples_into_block,samples_to_do,
-                        vgmstream->interleave_block_size);
-            }
-
-            break;
         case coding_CRI_ADX_fixed:
-            for (ch = 0; ch < vgmstream->channels; ch++) {
-                decode_adx_fixed(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch,
-                        vgmstream->channels,vgmstream->samples_into_block,samples_to_do,
-                        vgmstream->interleave_block_size);
-            }
-
-            break;
         case coding_CRI_ADX_enc_8:
         case coding_CRI_ADX_enc_9:
             for (ch = 0; ch < vgmstream->channels; ch++) {
-                decode_adx_enc(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch,
+                decode_adx(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch,
                         vgmstream->channels,vgmstream->samples_into_block,samples_to_do,
-                        vgmstream->interleave_block_size);
+                        vgmstream->interleave_block_size, vgmstream->coding_type);
             }
-
             break;
         case coding_NGC_DSP:
             for (ch = 0; ch < vgmstream->channels; ch++) {
@@ -2417,7 +2395,7 @@ void describe_vgmstream(VGMSTREAM * vgmstream, char * desc, int length) {
     }
 
     /* codecs with configurable frame size */
-    if (vgmstream->layout_type == layout_none && vgmstream->interleave_block_size > 0) {
+    if (vgmstream->interleave_block_size > 0) {
         switch (vgmstream->coding_type) {
             case coding_MSADPCM:
             case coding_MSADPCM_int:
@@ -2813,6 +2791,23 @@ int vgmstream_open_stream(VGMSTREAM * vgmstream, STREAMFILE *streamFile, off_t s
         return 1;
 #endif
 
+    if ((vgmstream->coding_type == coding_PSX_cfg ||
+            vgmstream->coding_type == coding_PSX_pivotal) &&
+            (vgmstream->interleave_block_size == 0 || vgmstream->interleave_block_size > 0x50)) {
+        VGM_LOG("VGMSTREAM: PSX-cfg decoder with wrong frame size %x\n", vgmstream->interleave_block_size);
+        return 0;
+    }
+
+    if ((vgmstream->coding_type == coding_CRI_ADX ||
+            vgmstream->coding_type == coding_CRI_ADX_enc_8 ||
+            vgmstream->coding_type == coding_CRI_ADX_enc_9 ||
+            vgmstream->coding_type == coding_CRI_ADX_exp ||
+            vgmstream->coding_type == coding_CRI_ADX_fixed) &&
+            (vgmstream->interleave_block_size == 0 || vgmstream->interleave_block_size > 0x12)) {
+        VGM_LOG("VGMSTREAM: ADX decoder with wrong frame size %x\n", vgmstream->interleave_block_size);
+        return 0;
+    }
+
     /* if interleave is big enough keep a buffer per channel */
     if (vgmstream->interleave_block_size * vgmstream->channels >= STREAMFILE_DEFAULT_BUFFER_SIZE) {
         use_streamfile_per_channel = 1;