From d49aacbf525db0e41aae821bd1297607ef6aaa30 Mon Sep 17 00:00:00 2001
From: bnnm <bananaman255@gmail.com>
Date: Sun, 20 Jun 2021 18:10:34 +0200
Subject: [PATCH] Improve MSADPCM accuracy

---
 src/coding/coding.h          |  2 +-
 src/coding/msadpcm_decoder.c | 23 ++++++++++-------------
 src/decode.c                 |  3 ++-
 src/meta/riff.c              |  1 +
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/coding/coding.h b/src/coding/coding.h
index 3a67caf2..ad9de151 100644
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@@ -161,7 +161,7 @@ STREAMFILE* nwa_get_streamfile(nwa_codec_data* data);
 #define MSADPCM_MAX_BLOCK_SIZE  0x800 /* known max and RIFF spec seems to concur, while MS's encoders may be lower (typical stereo: 0x8c, 0x2C, 0x48, 0x400) */
 
 void decode_msadpcm_stereo(VGMSTREAM* vgmstream, sample_t* outbuf, int32_t first_sample, int32_t samples_to_do);
-void decode_msadpcm_mono(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
+void decode_msadpcm_mono(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int config);
 void decode_msadpcm_ck(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel);
 long msadpcm_bytes_to_samples(long bytes, int block_size, int channels);
 int msadpcm_check_coefs(STREAMFILE* sf, off_t offset);
diff --git a/src/coding/msadpcm_decoder.c b/src/coding/msadpcm_decoder.c
index af999893..67107bd6 100644
--- a/src/coding/msadpcm_decoder.c
+++ b/src/coding/msadpcm_decoder.c
@@ -24,9 +24,9 @@ static const int16_t msadpcm_coefs[7][2] = {
 
 /* Decodes MSADPCM as explained in the spec (RIFFNEW doc + msadpcm.c).
  * Though RIFFNEW writes "predictor / 256" (DIV), msadpcm.c uses "predictor >> 8" (SHR). They may seem the
- * same but on negative values SHR gets different results (-128 / 256 = 0; -128 >> 8 = 1) = some output diffs.
+ * same but on negative values SHR gets different results (-128 / 256 = 0; -128 >> 8 = -1) = some output diffs.
  * SHR is true in Windows msadp32.acm decoders (up to Win10), while some non-Windows implementations or
- * engines (like UE4) use DIV though (more accurate).
+ * engines (like UE4) may use DIV.
  *
  * On invalid coef index, msadpcm.c returns 0 decoded samples but here we clamp and keep on trucking.
  * In theory blocks may be 0-padded and should use samples_per_frame from header, in practice seems to
@@ -131,25 +131,20 @@ void decode_msadpcm_stereo(VGMSTREAM* vgmstream, sample_t* outbuf, int32_t first
 
     /* decode nibbles */
     for (i = first_sample; i < first_sample + samples_to_do; i++) {
-        int ch;
+        uint8_t byte = get_u8(frame+0x07*2+(i-2));
 
-        for (ch = 0; ch < 2; ch++) {
-            VGMSTREAMCHANNEL* stream = &vgmstream->ch[ch];
-            uint8_t byte = get_u8(frame+0x07*2+(i-2));
-            int shift = (ch == 0); /* L = high nibble first (iErrorDelta) */
-
-            outbuf[0] = msadpcm_adpcm_expand_nibble_div(stream, byte, shift);
-            outbuf++;
-        }
+        *outbuf++ = msadpcm_adpcm_expand_nibble_shr(&vgmstream->ch[0], byte, 1); /* L */
+        *outbuf++ = msadpcm_adpcm_expand_nibble_shr(&vgmstream->ch[1], byte, 0); /* R */
     }
 }
 
-void decode_msadpcm_mono(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) {
+void decode_msadpcm_mono(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int config) {
     VGMSTREAMCHANNEL* stream = &vgmstream->ch[channel];
     uint8_t frame[MSADPCM_MAX_BLOCK_SIZE] = {0};
     int i, frames_in;
     size_t bytes_per_frame, samples_per_frame;
     off_t frame_offset;
+    int is_shr = (config == 0);
 
     /* external interleave (variable size), mono */
     bytes_per_frame = vgmstream->frame_size;
@@ -188,7 +183,9 @@ void decode_msadpcm_mono(VGMSTREAM* vgmstream, sample_t* outbuf, int channelspac
         uint8_t byte = get_u8(frame+0x07+(i-2)/2);
         int shift = !(i & 1); /* high nibble first */
 
-        outbuf[0] = msadpcm_adpcm_expand_nibble_div(stream, byte, shift);
+        outbuf[0] = is_shr ?
+                msadpcm_adpcm_expand_nibble_shr(stream, byte, shift) :
+                msadpcm_adpcm_expand_nibble_div(stream, byte, shift);
         outbuf += channelspacing;
     }
 }
diff --git a/src/decode.c b/src/decode.c
index a023a112..db00ccea 100644
--- a/src/decode.c
+++ b/src/decode.c
@@ -1290,7 +1290,8 @@ void decode_vgmstream(VGMSTREAM* vgmstream, int samples_written, int samples_to_
             if (vgmstream->channels == 1 || vgmstream->coding_type == coding_MSADPCM_int) {
                 for (ch = 0; ch < vgmstream->channels; ch++) {
                     decode_msadpcm_mono(vgmstream,buffer+ch,
-                            vgmstream->channels,vgmstream->samples_into_block, samples_to_do, ch);
+                            vgmstream->channels,vgmstream->samples_into_block, samples_to_do, ch,
+                            vgmstream->codec_config);
                 }
             }
             else if (vgmstream->channels == 2) {
diff --git a/src/meta/riff.c b/src/meta/riff.c
index 6efa2fa0..a91dbeb1 100644
--- a/src/meta/riff.c
+++ b/src/meta/riff.c
@@ -794,6 +794,7 @@ VGMSTREAM* init_vgmstream_riff(STREAMFILE* sf) {
     /* UE4 uses interleaved mono MSADPCM, try to autodetect without breaking normal MSADPCM */
     if (fmt.coding_type == coding_MSADPCM && is_ue4_msadpcm(vgmstream, sf, &fmt, fact_sample_count, start_offset)) {
         vgmstream->coding_type = coding_MSADPCM_int;
+        vgmstream->codec_config = 1; /* mark as UE4 MSADPCM */
         vgmstream->frame_size = fmt.block_size;
         vgmstream->layout_type = layout_interleave;
         vgmstream->interleave_block_size = get_ue4_msadpcm_interleave(sf, &fmt, start_offset, data_size);