From ef64889a28b3300308139205f426598263fc7321 Mon Sep 17 00:00:00 2001
From: bnnm <bananaman255@gmail.com>
Date: Sun, 18 Nov 2018 17:00:48 +0100
Subject: [PATCH] Add XMA utils to fix encoder delay stuff

---
 src/coding/coding.h       |   4 +-
 src/coding/coding_utils.c | 391 +++++++++++++++++++++++---------------
 2 files changed, 237 insertions(+), 158 deletions(-)

diff --git a/src/coding/coding.h b/src/coding/coding.h
index ae1a5338..9d22b024 100644
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@@ -323,7 +323,6 @@ typedef struct {
 
     /* output */
     int32_t num_samples;
-    int32_t skip_samples;
     int32_t loop_start_sample;
     int32_t loop_end_sample;
 } ms_sample_data;
@@ -334,6 +333,9 @@ void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_ali
 void xma1_parse_fmt_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * loop_start_b, int32_t * loop_end_b, int32_t * loop_subframe, int be);
 void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int * loop_flag, int32_t * out_num_samples, int32_t * out_loop_start_sample, int32_t * out_loop_end_sample, int be);
 void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * num_samples, int32_t * loop_start_sample, int32_t * loop_end_sample);
+
+void xma_fix_raw_samples(VGMSTREAM *vgmstream, STREAMFILE*streamFile, off_t stream_offset, size_t stream_size, off_t chunk_offset, int fix_num_samples, int fix_loop_samples);
+
 int riff_get_fact_skip_samples(STREAMFILE * streamFile, off_t start_offset);
 
 size_t atrac3_bytes_to_samples(size_t bytes, int full_block_align);
diff --git a/src/coding/coding_utils.c b/src/coding/coding_utils.c
index 158c5e4e..537b97cd 100644
--- a/src/coding/coding_utils.c
+++ b/src/coding/coding_utils.c
@@ -370,7 +370,7 @@ int ffmpeg_make_riff_xwma(uint8_t * buf, size_t buf_size, int codec, size_t data
     put_16bitLE(buf+0x20, block_align); /* block align */
     put_16bitLE(buf+0x22, 16); /* bits per sample */
     put_16bitLE(buf+0x24, 0); /* extra size */
-    /* here goes the "dpds" seek table, but it's optional and not needed by FFmpeg */
+    /* here goes the "dpds" seek table, but it's optional and not needed by FFmpeg (and also buggy) */
 
     memcpy(buf+0x26, "data", 4);
     put_32bitLE(buf+0x2a, data_size); /* data size */
@@ -439,78 +439,86 @@ fail:
 /* XMA PARSING                                  */
 /* ******************************************** */
 
+static void ms_audio_parse_header(STREAMFILE *streamFile, int xma_version, off_t offset_b, int bits_frame_size, size_t *first_frame_b, size_t *packet_skip_count, size_t *header_size_b) {
+    if (xma_version == 1) { /* XMA1 */
+        //packet_sequence  = read_bitsBE_b(offset_b+0,  4,  streamFile); /* numbered from 0 to N */
+        //unknown          = read_bitsBE_b(offset_b+4,  2,  streamFile); /* packet_metadata? (always 2) */
+        *first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile); /* offset in bits inside the packet */
+        *packet_skip_count = read_bitsBE_b(offset_b+21, 11, streamFile); /* packets to skip for next packet of this stream */
+        *header_size_b     = 32;
+    } else if (xma_version == 2) { /* XMA2 */
+        //frame_count      = read_bitsBE_b(offset_b+0,  6,  streamFile); /* frames that begin in this packet */
+        *first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile); /* offset in bits inside this packet */
+        //packet_metadata = read_bitsBE_b(offset_b+21, 3,  streamFile); /* packet_metadata (always 1) */
+        *packet_skip_count = read_bitsBE_b(offset_b+24, 8,  streamFile); /* packets to skip for next packet of this stream */
+        *header_size_b     = 32;
+    } else { /* WMAPRO(v3) */
+        //packet_sequence  = read_bitsBE_b(offset_b+0,  4,  streamFile); /* numbered from 0 to N */
+        //unknown          = read_bitsBE_b(offset_b+4,  2,  streamFile); /* packet_metadata? (always 2) */
+        *first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile);  /* offset in bits inside the packet */
+        *packet_skip_count = 0; /* xwma has no need to skip packets since it uses real multichannel audio */
+        *header_size_b     = 4+2+bits_frame_size; /* variable-sized header */
+    }
+
+
+    /* XMA2 packets with XMA1 RIFF (transmogrified), remove the packet metadata flag */
+    if (xma_version == 1 && (*packet_skip_count & 0x700) == 0x100) {
+        //VGM_LOG("MS_SAMPLES: XMA1 transmogrified packet header at 0x%lx\n", (off_t)offset_b/8);
+        *packet_skip_count &= ~0x100;
+    }
+
+    /* full packet skip, no new frames start in this packet (prev frames can end here)
+     * standardized to some value */
+    if (*packet_skip_count == 0x7FF) { /* XMA1, 11b */
+        VGM_LOG("MS_SAMPLES: XMA1 full packet_skip at 0x%"PRIx64"\n", (off64_t)offset_b/8);
+        *packet_skip_count = 0x800;
+    }
+    else if (*packet_skip_count == 0xFF) { /* XMA2, 8b*/
+        VGM_LOG("MS_SAMPLES: XMA2 full packet_skip at 0x%"PRIx64"\n", (off64_t)offset_b/8);
+        *packet_skip_count = 0x800;
+    }
+
+    /* unusual but not impossible, as the encoder can interleave packets in any way */
+    VGM_ASSERT((*packet_skip_count > 10 && *packet_skip_count < 0x800),
+            "MS_SAMPLES: found big packet skip %i at 0x%"PRIx64"\n", *packet_skip_count, (off64_t)offset_b/8);
+}
+
 /**
  * Find total and loop samples of Microsoft audio formats (WMAPRO/XMA1/XMA2) by reading frame headers.
  *
  * The stream is made of packets, each containing N small frames of X samples. Frames are further divided into subframes.
  * XMA1/XMA2 can divided into streams for multichannel (1/2ch ... 1/2ch). From the file start, packet 1..N is owned by
  * stream 1..N. Then must follow "packet_skip" value to find the stream next packet, as they are arbitrarily interleaved.
+ * We only need to follow the first stream, as all must contain the same number of samples.
+ *
  * XMA1/XMA2/WMAPRO data only differs in the packet headers.
  */
-static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int start_packet, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int samples_per_subframe, int bits_frame_size) {
-    int frames = 0, samples = 0, loop_start_frame = 0, loop_end_frame = 0, start_skip = 0, end_skip = 0;
+static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int samples_per_subframe, int bits_frame_size) {
+    int frames = 0, samples = 0, loop_start_frame = 0, loop_end_frame = 0;
 
-    size_t first_frame_b, packet_skip_count = 0, frame_size_b, packet_size_b, header_size_b;
+    size_t first_frame_b, packet_skip_count, header_size_b, frame_size_b;
     off_t offset_b, packet_offset_b, frame_offset_b;
 
     size_t packet_size = bytes_per_packet;
+    size_t packet_size_b = packet_size * 8;
     off_t offset = msd->data_offset;
     off_t max_offset = msd->data_offset + msd->data_size;
     off_t stream_offset_b = msd->data_offset * 8;
 
-    offset += start_packet * packet_size;
-    packet_size_b = packet_size * 8;
-
     /* read packets */
     while (offset < max_offset) {
         offset_b = offset * 8; /* global offset in bits */
         offset += packet_size; /* global offset in bytes */
 
         /* packet header */
-        if (msd->xma_version == 1) { /* XMA1 */
-            //packet_sequence = read_bitsBE_b(offset_b+0,  4,  streamFile); /* numbered from 0 to N */
-            //unknown         = read_bitsBE_b(offset_b+4,  2,  streamFile); /* packet_metadata? (always 2) */
-            first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile); /* offset in bits inside the packet */
-            packet_skip_count = read_bitsBE_b(offset_b+21, 11, streamFile); /* packets to skip for next packet of this stream */
-            header_size_b     = 32;
-        } else if (msd->xma_version == 2) { /* XMA2 */
-            //frame_count     = read_bitsBE_b(offset_b+0,  6,  streamFile); /* frames that begin in this packet */
-            first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile); /* offset in bits inside this packet */
-            //packet_metadata = read_bitsBE_b(offset_b+21, 3,  streamFile); /* packet_metadata (always 1) */
-            packet_skip_count = read_bitsBE_b(offset_b+24, 8,  streamFile); /* packets to skip for next packet of this stream */
-            header_size_b     = 32;
-        } else { /* WMAPRO(v3) */
-            //packet_sequence = read_bitsBE_b(offset_b+0,  4,  streamFile); /* numbered from 0 to N */
-            //unknown         = read_bitsBE_b(offset_b+4,  2,  streamFile); /* packet_metadata? (always 2) */
-            first_frame_b     = read_bitsBE_b(offset_b+6,  bits_frame_size, streamFile);  /* offset in bits inside the packet */
-            packet_skip_count = 0; /* xwma has no need to skip packets since it uses real multichannel audio */
-            header_size_b     = 4+2+bits_frame_size; /* variable-sized header */
+        ms_audio_parse_header(streamFile, msd->xma_version, offset_b, bits_frame_size, &first_frame_b, &packet_skip_count, &header_size_b);
+        if (packet_skip_count > 0x7FF) {
+            continue; /* full skip */
         }
 
-
-        /* XMA2 packets with XMA1 RIFF (transmogrified), remove the packet metadata flag */
-        if (msd->xma_version == 1 && (packet_skip_count & 0x700) == 0x100) {
-            //VGM_LOG("MS_SAMPLES: XMA1 transmogrified packet header at 0x%lx\n", (off_t)offset_b/8);
-            packet_skip_count &= ~0x100;
-        }
-
-        /* full packet skip, no new frames start in this packet (prev frames can end here) */
-        if (packet_skip_count == 0x7FF) { /* XMA1, 11b */
-            VGM_LOG("MS_SAMPLES: XMA1 full packet_skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
-            packet_skip_count = 0;
-            continue;
-        }
-        else if (packet_skip_count == 0xFF) { /* XMA2, 8b*/
-            VGM_LOG("MS_SAMPLES: XMA2 full packet_skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
-            packet_skip_count = 0;
-            continue;
-        }
-        offset += packet_size * (packet_skip_count); /* skip packets not owned by the first stream, since we only need samples from it */
-
-        /* unusual but not impossible, as the encoder can interleave packets in any way */
-        VGM_ASSERT(packet_skip_count > 10, "MS_SAMPLES: found big packet skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
-
         packet_offset_b = header_size_b + first_frame_b;
+        /* skip packets not owned by the first stream for next time */
+        offset += packet_size * (packet_skip_count);
 
 
         /* read packet frames */
@@ -526,7 +534,6 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
             /* frame header */
             frame_size_b = read_bitsBE_b(frame_offset_b, bits_frame_size, streamFile);
             frame_offset_b += bits_frame_size;
-            //;VGM_LOG("MS_SAMPLES: frame_offset=0x%lx (0b%lx), frame_size=0x%x (0b%x)\n", (off_t)frame_offset_b/8,(off_t)frame_offset_b, frame_size_b/8, frame_size_b);
 
             /* stop when packet padding starts (0x00 for XMA1 or 0xFF in XMA2) */
             if (frame_size_b == 0 || frame_size_b == (0xffffffff >> (32 - bits_frame_size)))  {
@@ -535,12 +542,77 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
             packet_offset_b += frame_size_b; /* including header */
 
 
-            /* find skips (info from FFmpeg) */
-            if (channels_per_packet && (msd->xma_version == 1 || msd->xma_version == 2)) {
-                int flag;
-                int tilehdr_size = 15; //todo incorrect but usable for XMA, fix for WMAPro (complex calcs, see ffmpeg decode_tilehdr)
+            samples += samples_per_frame;
+            frames++;
 
-                frame_offset_b += tilehdr_size;
+            /* last bit in frame = more frames flag, end packet to avoid reading garbage in some cases
+             * (last frame spilling to other packets also has this flag, though it's ignored here) */
+            if (packet_offset_b < packet_size_b && !read_bitsBE_b(offset_b + packet_offset_b - 1, 1, streamFile)) {
+                break;
+            }
+        }
+    }
+
+    /* result */
+    msd->num_samples = samples;
+    if (msd->loop_flag && loop_end_frame > loop_start_frame) {
+        msd->loop_start_sample = loop_start_frame * samples_per_frame + msd->loop_start_subframe * samples_per_subframe;
+        msd->loop_end_sample = loop_end_frame * samples_per_frame + (msd->loop_end_subframe) * samples_per_subframe;
+    }
+
+    /* the above can't properly read skips for WMAPro ATM, but should fixed to 1 frame anyway */
+    if (msd->xma_version == 0) {
+        msd->num_samples -= samples_per_frame; /* FFmpeg does skip this */
+#if 0
+        msd->num_samples += (samples_per_frame / 2); /* but doesn't add extra samples */
+#endif
+    }
+}
+
+/* simlar to the above but only gets skips */
+static void ms_audio_get_skips(STREAMFILE *streamFile, int xma_version, off_t data_offset, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int bits_frame_size, int *out_start_skip, int *out_end_skip) {
+    int start_skip = 0, end_skip = 0;
+
+    size_t first_frame_b, packet_skip_count, header_size_b, frame_size_b;
+    off_t offset_b, packet_offset_b, frame_offset_b;
+
+    size_t packet_size = bytes_per_packet;
+    size_t packet_size_b = packet_size * 8;
+    off_t offset = data_offset;
+
+    /* read packet */
+    {
+        offset_b = offset * 8; /* global offset in bits */
+        offset += packet_size; /* global offset in bytes */
+
+        /* packet header */
+        ms_audio_parse_header(streamFile, 2, offset_b, bits_frame_size, &first_frame_b, &packet_skip_count, &header_size_b);
+        if (packet_skip_count > 0x7FF) {
+            return; /* full skip */
+        }
+
+        packet_offset_b = header_size_b + first_frame_b;
+
+        /* read packet frames */
+        while (packet_offset_b < packet_size_b) {
+            frame_offset_b = offset_b + packet_offset_b; /* in bits for aligment stuff */
+
+            /* frame header */
+            frame_size_b = read_bitsBE_b(frame_offset_b, bits_frame_size, streamFile);
+            frame_offset_b += bits_frame_size;
+
+            /* stop when packet padding starts (0x00 for XMA1 or 0xFF in XMA2) */
+            if (frame_size_b == 0 || frame_size_b == (0xffffffff >> (32 - bits_frame_size)))  {
+                break;
+            }
+            packet_offset_b += frame_size_b; /* including header */
+
+            /* find skips (info from FFmpeg) */
+            if (channels_per_packet && (xma_version == 1 || xma_version == 2)) {
+                int flag;
+                int len_tilehdr_size = 15; //todo incorrect but usable for XMA, fix for WMAPro (complex, see ffmpeg decode_tilehdr)
+
+                frame_offset_b += len_tilehdr_size;
 
                 /* ignore "postproc transform" */
                 if (channels_per_packet > 1) {
@@ -555,7 +627,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
                     }
                 }
 
-                /* get start/end skips to get the proper number of samples */
+                /* get start/end skips to get the proper number of samples (both can be 0) */
                 flag = read_bitsBE_b(frame_offset_b, 1, streamFile);
                 frame_offset_b += 1;
                 if (flag) {
@@ -564,8 +636,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
                     frame_offset_b += 1;
                     if (flag) {
                         int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile);
-                        VGM_LOG("MS_SAMPLES: start_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
-                        VGM_ASSERT(start_skip, "MS_SAMPLES: more than one start_skip (%i)\n", new_skip); //ignore, happens due to incorrect tilehdr_size
+                        //;VGM_LOG("MS_SAMPLES: start_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
                         frame_offset_b += 10;
 
                         if (new_skip > samples_per_frame) /* from xmaencode */
@@ -580,8 +651,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
                     frame_offset_b += 1;
                     if (flag) {
                         int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile);
-                        VGM_LOG("MS_SAMPLES: end_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
-                        VGM_ASSERT(end_skip, "MS_SAMPLES: more than one end_skip (%i)\n", new_skip);//ignore, happens due to incorrect tilehdr_size
+                        //;VGM_LOG("MS_SAMPLES: end_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
                         frame_offset_b += 10;
 
                         if (new_skip > samples_per_frame) /* from xmaencode  */
@@ -591,56 +661,15 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
                     }
                 }
             }
-
-
-            samples += samples_per_frame;
-            frames++;
-
-            /* last bit in frame = more frames flag, end packet to avoid reading garbage in some cases
-             * (last frame spilling to other packets also has this flag, though it's ignored here) */
-            if (packet_offset_b < packet_size_b && !read_bitsBE_b(offset_b + packet_offset_b - 1, 1, streamFile)) {
-                break;
-            }
         }
     }
 
-
-    msd->num_samples = samples;
-    msd->skip_samples = start_skip; //todo remove once FFmpeg decodes correctly
-    if (msd->loop_flag && loop_end_frame > loop_start_frame) {
-        msd->loop_start_sample = loop_start_frame * samples_per_frame + msd->loop_start_subframe * samples_per_subframe;
-        msd->loop_end_sample = loop_end_frame * samples_per_frame + (msd->loop_end_subframe) * samples_per_subframe;
-    }
-
-    //todo apply once FFmpeg decode is ok
-    // for XMA must internal skip 64 samples + apply skips + output extra 128 IMDCT samples) and remove skip_samples output
-#if 0
-    if (msd->xma_version == 1 || msd->xma_version == 2) {
-        msd->num_samples += 128; /* final extra IMDCT samples */
-        msd->num_samples -= start_skip; /* can be less but fixed to 512 in practice */
-        msd->num_samples -= end_skip;
-
-        /* from xma2encode tests this looks correct (probably XMA loops considering IMDCT delay)
-         * a full loop wav > xma makes start=384, then xma > wav writes "smpl" with start=0 */
-        if (msd->loop_flag) {
-            msd->loop_start_sample += 128;
-            msd->loop_start_sample -= start_skip;
-
-            msd->loop_end_sample += 128;
-            msd->loop_end_sample -= start_skip;
-        }
-    }
-#endif
-
-    /* the above can't properly read skips for WMAPro ATM, but should fixed to 1 frame anyway */
-    if (msd->xma_version == 0) {
-        msd->num_samples -= samples_per_frame; /* FFmpeg does skip this */
-#if 0
-        msd->num_samples += (samples_per_frame / 2); /* but doesn't add extra samples */
-#endif
-    }
+    /* output results */
+    if (out_start_skip) *out_start_skip = start_skip;
+    if (out_end_skip) *out_end_skip = end_skip;
 }
 
+
 static int wma_get_samples_per_frame(int version, int sample_rate, uint32_t decode_flags) {
     int frame_len_bits;
 
@@ -668,33 +697,40 @@ static int wma_get_samples_per_frame(int version, int sample_rate, uint32_t deco
     return 1 << frame_len_bits;
 }
 
-void xma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile) {
-    const int bytes_per_packet = 2048;
-    const int samples_per_frame = 512;
-    const int samples_per_subframe = 128;
-
+static int xma_get_channels_per_stream(STREAMFILE* streamFile, off_t chunk_offset, int channels) {
     int start_stream = 0;
     int channels_per_stream = 0;
 
     /* get from stream config (needed to find skips) */
-    if (msd->chunk_offset) {
-        int format = read_16bitLE(msd->chunk_offset,streamFile);
+    if (chunk_offset) {
+        int format = read_16bitLE(chunk_offset,streamFile);
         if (format == 0x0165 || format == 0x6501) { /* XMA1 */
-            channels_per_stream = read_8bit(msd->chunk_offset + 0x0C + 0x14*start_stream + 0x11,streamFile);
+            channels_per_stream = read_8bit(chunk_offset + 0x0C + 0x14*start_stream + 0x11,streamFile);
         } else if (format == 0x0166 || format == 0x6601) { /* new XMA2 */
-            channels_per_stream = msd->channels > 1 ? 2 : 1;
+            channels_per_stream = channels > 1 ? 2 : 1;
         } else { /* old XMA2 */
-            int version = read_8bit(msd->chunk_offset,streamFile);
-            channels_per_stream = read_8bit(msd->chunk_offset + 0x20 + (version==3 ? 0x00 : 0x08) + 0x4*start_stream + 0x00,streamFile);
+            int version = read_8bit(chunk_offset,streamFile);
+            channels_per_stream = read_8bit(chunk_offset + 0x20 + (version==3 ? 0x00 : 0x08) + 0x4*start_stream + 0x00,streamFile);
         }
     }
-    else if (msd->channels) {
-        channels_per_stream = msd->channels > 1 ? 2 : 1;
+    else if (channels) {
+        channels_per_stream = channels == 1 ? 1 : 2; /* default for XMA without RIFF chunks, most common */
     }
+
     if (channels_per_stream > 2)
         channels_per_stream = 0;
 
-    ms_audio_get_samples(msd, streamFile, start_stream, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, 15);
+    return channels_per_stream;
+}
+
+void xma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile) {
+    const int bytes_per_packet = 2048;
+    const int samples_per_frame = 512;
+    const int samples_per_subframe = 128;
+    const int bits_frame_size = 15;
+    int channels_per_stream = xma_get_channels_per_stream(streamFile, msd->chunk_offset, msd->channels);
+
+    ms_audio_get_samples(msd, streamFile, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size);
 }
 
 void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) {
@@ -703,7 +739,6 @@ void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_
     int samples_per_frame = 0;
     int samples_per_subframe = 0;
     int bits_frame_size = 0;
-    int start_packet = 0;
     int channels_per_stream = msd->channels;
 
     if (!(decode_flags & 0x40)) {
@@ -716,7 +751,7 @@ void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_
     samples_per_subframe = 0; /* not needed as WMAPro can't use loop subframes (complex subframe lengths) */
     msd->xma_version = 0; /* signal it's not XMA */
 
-    ms_audio_get_samples(msd, streamFile, start_packet, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size);
+    ms_audio_get_samples(msd, streamFile, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size);
 }
 
 void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) {
@@ -761,6 +796,81 @@ void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_ali
 }
 
 
+/* XMA hell for precise looping and gapless support, fixes raw sample values from headers
+ * that don't count XMA's final subframe/encoder delay/encoder padding, and FFmpeg stuff.
+ * Configurable since different headers vary for maximum annoyance. */
+void xma_fix_raw_samples(VGMSTREAM *vgmstream, STREAMFILE*streamFile, off_t stream_offset, size_t stream_size, off_t chunk_offset, int fix_num_samples, int fix_loop_samples) {
+    const int bytes_per_packet = 2048;
+    const int samples_per_frame = 512;
+    const int samples_per_subframe = 128;
+    const int bits_frame_size = 15;
+
+    int xma_version = 2; /* works ok even for XMA1 */
+    int channels_per_stream = xma_get_channels_per_stream(streamFile, chunk_offset, vgmstream->channels);
+    off_t first_packet = stream_offset;
+    off_t last_packet = stream_offset + stream_size - bytes_per_packet;
+    int32_t start_skip = 0, end_skip = 0;
+
+    if (stream_offset + stream_size > get_streamfile_size(streamFile)) {
+        VGM_LOG("XMA SKIPS: ignoring bad stream offset+size vs real size\n");
+        return;
+    }
+
+    /* find delay/padding values in the bitstream (should be safe even w/ multistreams
+     * as every stream repeats them). Theoretically every packet could contain skips,
+     * doesn't happen in practice though. */
+    ms_audio_get_skips(streamFile, xma_version, first_packet, channels_per_stream, bytes_per_packet, samples_per_frame, bits_frame_size, &start_skip, NULL);
+    ms_audio_get_skips(streamFile, xma_version,  last_packet, channels_per_stream, bytes_per_packet, samples_per_frame, bits_frame_size, NULL, &end_skip);
+
+    //;VGM_LOG("XMA SKIPS: apply start=%i, end=%i\n", start_skip, end_skip);
+    VGM_ASSERT(start_skip < samples_per_frame, "XMA SKIPS: small start skip\n");
+
+    if (end_skip == 512) { /* most likely a read bug */
+        VGM_LOG("XMA SKIPS: ignoring big end_skip\n");
+        end_skip = 0;
+    }
+
+
+    /* apply XMA extra samples */
+    if (fix_num_samples) {
+        vgmstream->num_samples += samples_per_subframe; /* final extra IMDCT samples */
+        vgmstream->num_samples -= start_skip; /* first samples skipped at the beginning */
+        vgmstream->num_samples -= end_skip; /* last samples discarded at the end */
+    }
+
+    /* from xma2encode tests this is correct (probably encodes/decodes loops considering all skips), ex.-
+     * full loop wav to xma makes start=384 (0 + ~512 delay - 128 padding), then xma to wav has "smpl" start=0 */
+    if (fix_loop_samples && vgmstream->loop_flag) {
+        vgmstream->loop_start_sample += samples_per_subframe;
+        vgmstream->loop_start_sample -= start_skip;
+        vgmstream->loop_end_sample += samples_per_subframe;
+        vgmstream->loop_end_sample -= start_skip;
+        /* since loops are adjusted this shouldn't happen (often loop_end == num_samples after applying all) */
+        if (vgmstream->loop_end_sample > vgmstream->num_samples &&
+                vgmstream->loop_end_sample - end_skip <= vgmstream->loop_end_sample) {
+            VGM_LOG("XMA SAMPLES: adjusted loop end\n");
+            vgmstream->loop_end_sample -= end_skip;
+        }
+    }
+
+#ifdef VGM_USE_FFMPEG
+    /* also fix FFmpeg, since we now know exact skips */
+    {
+        ffmpeg_codec_data* data = vgmstream->codec_data;
+
+        /* FFmpeg doesn't XMA apply encoder delay ATM so here we fix it manually.
+         * XMA delay is part if the bitstream and while theoretically it could be any
+         *  value (and is honored by xmaencoder), basically it's always 512.
+         *
+         * Somehow also needs to skip 64 extra samples (looks like another FFmpeg bug
+         * where XMA outputs half a subframe samples late, WMAPRO isn't affected),
+         * which sometimes makes FFmpeg complain (=reads after end) but doesn't seem audible. */
+        if (data->skipSamples == 0) {
+            ffmpeg_set_skip_samples(data, start_skip+64);
+        }
+    }
+#endif
+}
 
 /* ******************************************** */
 /* HEADER PARSING                               */
@@ -808,6 +918,7 @@ void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int
     loop_start_sample = read_32bit(chunk_offset+0x28,streamFile);
     loop_end_sample   = loop_start_sample + read_32bit(chunk_offset+0x2C,streamFile);
     loop_flag         = (uint8_t)read_8bit(chunk_offset+0x30,streamFile) != 0;
+    /* may need loop end +1, though some header doesn't need it (ex.- Sonic and Sega All Stars Racing  .str) */
 
     /* flag rarely set, use loop_end as marker */
     if (!loop_flag) {
@@ -820,24 +931,7 @@ void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int
         }
     }
 
-#if 0 //todo apply once FFmpeg decode is ok
-    /* apply extra output + skips (see ms_audio_get_samples, approximate as find out with first and last frames) */
-    {
-        int start_skip = 512;
-        int end_skip = 0;
-
-        num_samples += 128;
-        num_samples -= start_skip;
-        num_samples -= end_skip;
-        if (loop_flag) {
-            loop_start_sample += 128;
-            loop_start_sample -= start_skip;
-            loop_end_sample += 128;
-            loop_end_sample -= start_skip;
-        }
-    }
-#endif
-
+    /* samples are "raw" values, must be fixed externally (see xma_fix_raw_samples) */
     if(out_num_samples)        *out_num_samples       = num_samples;
     if(out_loop_start_sample)  *out_loop_start_sample = loop_start_sample;
     if(out_loop_end_sample)    *out_loop_end_sample   = loop_end_sample;
@@ -862,6 +956,7 @@ void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * out
     loop_end_sample   = read_32bit(chunk_offset+0x08,streamFile);
     loop_flag = (uint8_t)read_8bit(chunk_offset+0x03,streamFile) > 0 || loop_end_sample; /* rarely not set, encoder default */
     sample_rate       = read_32bit(chunk_offset+0x0c,streamFile);
+    /* may need loop end +1 */
 
     offset = xma2_chunk_version == 3 ? 0x14 : 0x1C;
     num_samples = read_32bit(chunk_offset+offset+0x00,streamFile);
@@ -874,25 +969,7 @@ void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * out
         channels += read_8bit(chunk_offset+offset+i*0x04,streamFile);
     }
 
-#if 0 //todo apply once FFmpeg decode is ok
-    /* apply extra output + skips (see ms_audio_get_samples, approximate as find out with first and last frames) */
-    {
-        int start_skip = 512;
-        int end_skip = 0;
-
-        num_samples += 128;
-        num_samples -= start_skip;
-        num_samples -= end_skip;
-        if (loop_flag) {
-            loop_start_sample += 128;
-            loop_start_sample -= start_skip;
-
-            loop_end_sample += 128;
-            loop_end_sample -= start_skip;
-        }
-    }
-#endif
-
+    /* samples are "raw" values, must be fixed externally (see xma_fix_raw_samples) */
     if(out_channels)           *out_channels          = channels;
     if(out_sample_rate)        *out_sample_rate       = sample_rate;
     if(out_num_samples)        *out_num_samples       = num_samples;