From 8e2196a5140d2e956af400445cfd9210ef829019 Mon Sep 17 00:00:00 2001
From: bnnm <bananaman255@gmail.com>
Date: Sat, 19 Oct 2019 11:13:02 +0200
Subject: [PATCH] Clean RIFF Ogg code

---
 src/libvgmstream.vcproj          |   4 +
 src/libvgmstream.vcxproj         |   1 +
 src/libvgmstream.vcxproj.filters |   3 +
 src/meta/riff.c                  | 128 ++++++------------------------
 src/meta/riff_ogg_streamfile.h   | 130 +++++++++++++++++++++++++++++++
 5 files changed, 160 insertions(+), 106 deletions(-)
 create mode 100644 src/meta/riff_ogg_streamfile.h

diff --git a/src/libvgmstream.vcproj b/src/libvgmstream.vcproj
index 4d56f191..5c1b6696 100644
--- a/src/libvgmstream.vcproj
+++ b/src/libvgmstream.vcproj
@@ -308,6 +308,10 @@
                     RelativePath=".\meta\xavs_streamfile.h"
                     >
                 </File>
+                <File
+                    RelativePath=".\meta\riff_ogg_streamfile.h"
+                    >
+                </File>
                 <File
                     RelativePath=".\meta\sfh_streamfile.h"
                     >
diff --git a/src/libvgmstream.vcxproj b/src/libvgmstream.vcxproj
index c27152a3..3a7c959c 100644
--- a/src/libvgmstream.vcxproj
+++ b/src/libvgmstream.vcxproj
@@ -119,6 +119,7 @@
     <ClInclude Include="meta\mzrt_streamfile.h" />
     <ClInclude Include="meta\ogg_vorbis_streamfile.h" />
     <ClInclude Include="meta\opus_interleave_streamfile.h" />
+    <ClInclude Include="meta\riff_ogg_streamfile.h" />
     <ClInclude Include="meta\sfh_streamfile.h" />
     <ClInclude Include="meta\sqex_scd_streamfile.h" />
     <ClInclude Include="meta\sqex_sead_streamfile.h" />
diff --git a/src/libvgmstream.vcxproj.filters b/src/libvgmstream.vcxproj.filters
index 489be626..3bcd3738 100644
--- a/src/libvgmstream.vcxproj.filters
+++ b/src/libvgmstream.vcxproj.filters
@@ -128,6 +128,9 @@
     <ClInclude Include="meta\xavs_streamfile.h">
       <Filter>meta\Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="meta\riff_ogg_streamfile.h">
+      <Filter>meta\Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="meta\sfh_streamfile.h">
       <Filter>meta\Header Files</Filter>
     </ClInclude>
diff --git a/src/meta/riff.c b/src/meta/riff.c
index aad822b8..7264b3d2 100644
--- a/src/meta/riff.c
+++ b/src/meta/riff.c
@@ -1,15 +1,12 @@
+#include <string.h>
 #include "meta.h"
 #include "../coding/coding.h"
 #include "../layout/layout.h"
 #include "../util.h"
-#include <string.h>
+#include "riff_ogg_streamfile.h"
 
 /* RIFF - Resource Interchange File Format, standard container used in many games */
 
-#ifdef VGM_USE_VORBIS
-static VGMSTREAM *parse_riff_ogg(STREAMFILE * streamFile, off_t start_offset, size_t data_size);
-#endif
-
 
 /* return milliseconds */
 static long parse_adtl_marker(unsigned char * marker) {
@@ -448,7 +445,7 @@ VGMSTREAM * init_vgmstream_riff(STREAMFILE *streamFile) {
                     break;
 
                 case 0x66616374:    /* "fact" */
-                    if (chunk_size == 0x04) { /* standard, usually found with ADPCM */
+                    if (chunk_size == 0x04) { /* standard (usually for ADPCM, MS recommends to set for non-PCM codecs) */
                         fact_sample_count = read_32bitLE(current_chunk+0x08, streamFile);
                     }
                     else if (chunk_size == 0x10 && read_32bitBE(current_chunk+0x08+0x04, streamFile) == 0x4C794E20) { /* "LyN " */
@@ -537,13 +534,6 @@ VGMSTREAM * init_vgmstream_riff(STREAMFILE *streamFile) {
         goto fail;
     }
 
-#ifdef VGM_USE_VORBIS
-    /* special case using init_vgmstream_ogg_vorbis */
-    if (fmt.coding_type == coding_OGG_VORBIS) {
-        return parse_riff_ogg(streamFile, start_offset, data_size);
-    }
-#endif
-
 
     /* build the VGMSTREAM */
     vgmstream = allocate_vgmstream(fmt.channel_count,loop_flag);
@@ -568,6 +558,9 @@ VGMSTREAM * init_vgmstream_riff(STREAMFILE *streamFile) {
 #endif
 #ifdef VGM_USE_ATRAC9
         case coding_ATRAC9:
+#endif
+#ifdef VGM_USE_VORBIS
+        case coding_OGG_VORBIS:
 #endif
             vgmstream->layout_type = layout_none;
             vgmstream->interleave_block_size = fmt.block_size;
@@ -694,6 +687,21 @@ VGMSTREAM * init_vgmstream_riff(STREAMFILE *streamFile) {
             break;
         }
 #endif
+#ifdef VGM_USE_VORBIS
+        case coding_OGG_VORBIS: {
+            /* special handling of Liar-soft's buggy RIFF+Ogg made with Soundforge [Shikkoku no Sharnoth (PC)] */
+            STREAMFILE *temp_sf = setup_riff_ogg_streamfile(streamFile, start_offset, data_size);
+            if (!temp_sf) goto fail;
+
+            vgmstream->codec_data = init_ogg_vorbis(temp_sf, 0x00, get_streamfile_size(temp_sf), NULL);
+            if (!vgmstream->codec_data) goto fail;
+
+            /* Soundforge includes fact_samples and should be equal to Ogg samples */
+            vgmstream->num_samples = fact_sample_count;
+            break;
+        }
+#endif
+
         default:
             goto fail;
     }
@@ -742,7 +750,7 @@ VGMSTREAM * init_vgmstream_riff(STREAMFILE *streamFile) {
         vgmstream->meta_type = meta_RIFF_WAVE_MWV;
     }
 
-    if ( !vgmstream_open_stream(vgmstream,streamFile,start_offset) )
+    if (!vgmstream_open_stream(vgmstream, streamFile, start_offset))
         goto fail;
 
     /* UE4 uses half-interleave mono MSADPCM, try to autodetect without breaking normal MSADPCM */
@@ -940,95 +948,3 @@ fail:
     close_vgmstream(vgmstream);
     return NULL;
 }
-
-
-#ifdef VGM_USE_VORBIS
-typedef struct {
-    off_t patch_offset;
-} riff_ogg_io_data;
-
-static size_t riff_ogg_io_read(STREAMFILE *streamfile, uint8_t *dest, off_t offset, size_t length, riff_ogg_io_data* data) {
-    size_t bytes_read = streamfile->read(streamfile, dest, offset, length);
-
-    /* has garbage init Oggs pages, patch bad flag */
-    if (data->patch_offset && data->patch_offset >= offset && data->patch_offset < offset + bytes_read) {
-        VGM_ASSERT(dest[data->patch_offset - offset] != 0x02, "RIFF Ogg: bad patch offset\n");
-        dest[data->patch_offset - offset] = 0x00;
-    }
-
-    return bytes_read;
-}
-
-/* special handling of Liar-soft's buggy RIFF+Ogg made with Soundforge [Shikkoku no Sharnoth (PC)] */
-static VGMSTREAM *parse_riff_ogg(STREAMFILE * streamFile, off_t start_offset, size_t data_size) {
-    off_t patch_offset = 0;
-    size_t real_size = data_size;
-
-    /* initial page flag is repeated and causes glitches in decoders, find bad offset */
-    {
-        off_t offset = start_offset + 0x04+0x02;
-        off_t offset_limit = start_offset + data_size; /* usually in the first 0x3000 but can be +0x100000 */
-
-        while (offset < offset_limit) {
-            if (read_32bitBE(offset+0x00, streamFile) == 0x4f676753 &&  /* "OggS" */
-                read_16bitBE(offset+0x04, streamFile) == 0x0002) {      /* start page flag */
-
-                //todo callback should patch on-the-fly by analyzing all "OggS", but is problematic due to arbitrary offsets
-                if (patch_offset) {
-                    VGM_LOG("RIFF Ogg: found multiple repeated start pages\n");
-                    return NULL;
-                }
-
-                patch_offset = offset /*- start_offset*/ + 0x04+0x01;
-            }
-            offset++; //todo could be optimized to do OggS page sizes
-        }
-    }
-
-    /* last pages don't have the proper flag and confuse decoders, find actual end */
-    {
-        size_t max_size = data_size;
-        off_t offset_limit = start_offset + data_size - 0x1000; /* not worth checking more, let decoder try */
-        off_t offset = start_offset + data_size - 0x1a;
-
-        while (offset > offset_limit) {
-            if (read_32bitBE(offset+0x00, streamFile) == 0x4f676753) { /* "OggS" */
-                if (read_16bitBE(offset+0x04, streamFile) == 0x0004) { /* last page flag */
-                    real_size = max_size;
-                    break;
-                } else {
-                    max_size = offset - start_offset; /* ignore bad pages */
-                }
-            }
-            offset--;
-        }
-    }
-
-    /* Soundforge includes fact_samples but should be equal to Ogg samples */
-
-    /* actual Ogg init with custom callback to patch weirdness */
-    {
-        VGMSTREAM *vgmstream = NULL;
-        STREAMFILE *custom_streamFile = NULL;
-        ogg_vorbis_meta_info_t ovmi = {0};
-        riff_ogg_io_data io_data = {0};
-        size_t io_data_size = sizeof(riff_ogg_io_data);
-
-
-        ovmi.meta_type = meta_RIFF_WAVE;
-        ovmi.stream_size = real_size;
-        //inf.loop_flag = 0; /* not observed */
-
-        io_data.patch_offset = patch_offset;
-
-        custom_streamFile = open_io_streamfile(open_wrap_streamfile(streamFile), &io_data,io_data_size, riff_ogg_io_read,NULL);
-        if (!custom_streamFile) return NULL;
-
-        vgmstream = init_vgmstream_ogg_vorbis_callbacks(custom_streamFile, NULL, start_offset, &ovmi);
-
-        close_streamfile(custom_streamFile);
-
-        return vgmstream;
-    }
-}
-#endif
diff --git a/src/meta/riff_ogg_streamfile.h b/src/meta/riff_ogg_streamfile.h
new file mode 100644
index 00000000..6c7c599d
--- /dev/null
+++ b/src/meta/riff_ogg_streamfile.h
@@ -0,0 +1,130 @@
+#ifndef _RIFF_OGG_STREAMFILE_H_
+#define _RIFF_OGG_STREAMFILE_H_
+#include "../streamfile.h"
+
+#ifdef VGM_USE_VORBIS
+typedef struct {
+    off_t patch_offset;
+} riff_ogg_io_data;
+
+static size_t riff_ogg_io_read(STREAMFILE *streamfile, uint8_t *dest, off_t offset, size_t length, riff_ogg_io_data* data) {
+    size_t bytes_read = streamfile->read(streamfile, dest, offset, length);
+
+    /* has garbage init Oggs pages, patch bad flag */
+    if (data->patch_offset && data->patch_offset >= offset && data->patch_offset < offset + bytes_read) {
+        VGM_ASSERT(dest[data->patch_offset - offset] != 0x02, "RIFF Ogg: bad patch offset at %lx\n", data->patch_offset);
+        dest[data->patch_offset - offset] = 0x00;
+    }
+
+    return bytes_read;
+}
+
+static size_t ogg_get_page(uint8_t *buf, size_t bufsize, off_t offset, STREAMFILE *sf) {
+    size_t segments, bytes, page_size;
+    int i;
+
+    if (0x1b > bufsize) goto fail;
+    bytes = read_streamfile(buf, offset, 0x1b, sf);
+    if (bytes != 0x1b) goto fail;
+
+    segments = get_u8(buf + 0x1a);
+    if (0x1b + segments > bufsize) goto fail;
+
+    bytes = read_streamfile(buf + 0x1b, offset + 0x1b, segments, sf);
+    if (bytes != segments) goto fail;
+
+    page_size = 0x1b + segments;
+    for (i = 0; i < segments; i++) {
+        page_size += get_u8(buf + 0x1b + i);
+    }
+
+    return page_size;
+fail:
+    return 0;
+}
+
+/* patches Oggs with weirdness */
+static STREAMFILE* setup_riff_ogg_streamfile(STREAMFILE *sf, off_t start, size_t size) {
+    off_t patch_offset = 0;
+    size_t real_size = size;
+    uint8_t buf[0x1000];
+
+
+    /* initial page flag is repeated and causes glitches in decoders, find bad offset */
+    //todo callback could patch on-the-fly by analyzing all "OggS", but is problematic due to arbitrary offsets
+    {
+        off_t offset = start;
+        size_t page_size;
+        off_t offset_limit = start + size; /* usually in the first 0x3000 but can be +0x100000 */
+        //todo this doesn't seem to help much
+        STREAMFILE *temp_sf = reopen_streamfile(sf, 0x100); /* use small-ish sf to avoid reading the whole thing */
+
+        /* first page is ok */
+        page_size = ogg_get_page(buf, sizeof(buf), offset, temp_sf);
+        offset += page_size;
+
+        while (offset < offset_limit) {
+            page_size = ogg_get_page(buf, sizeof(buf), offset, temp_sf);
+            if (page_size == 0) break;
+
+            if (get_u32be(buf + 0x00) != 0x4f676753) /* "OggS" */
+                break;
+
+            if (get_u16be(buf + 0x04) == 0x0002) { /* start page flag */
+                //;VGM_ASSERT(patch_offset > 0, "RIFF Ogg: found multiple repeated start pages\n");
+                patch_offset = (offset - start) + 0x04 + 0x01; /* clamp'ed */
+                break;
+            }
+
+            offset += page_size;
+        }
+
+        close_streamfile(temp_sf);
+
+        if (patch_offset == 0)
+            return NULL;
+    }
+
+    /* has a bunch of padding(?) pages at the end with no data nor flag that confuse decoders, find actual end */
+    {
+        size_t chunk_size = sizeof(buf); /* not worth testing more */
+        size_t max_size = size;
+        size_t pos;
+        off_t read_offset = start + size - chunk_size;
+
+        pos = read_streamfile(buf, read_offset, chunk_size, sf);
+        if (read_offset < 0 || pos <= 0x1a) return NULL;
+
+        pos -= 0x1a; /* at least one OggS page */
+        while (pos > 0) {
+            if (get_u32be(buf + pos + 0x00) == 0x4f676753) { /* "OggS" */
+
+                if (get_u16be(buf + pos + 0x04) == 0x0004) { /* last page flag is ok */
+                    real_size = max_size;
+                    break;
+                }
+                else { /* last page flag is wrong */
+                    max_size = size - (chunk_size - pos); /* update size up to this page */
+                }
+            }
+            pos--;
+        }
+    }
+
+    /* actual custom streamfile init */
+    {
+        STREAMFILE *new_sf = NULL;
+        riff_ogg_io_data io_data = {0};
+
+        io_data.patch_offset = patch_offset;
+
+        new_sf = open_wrap_streamfile(sf);
+        new_sf = open_clamp_streamfile_f(new_sf, start, real_size);
+        new_sf = open_io_streamfile_f(new_sf, &io_data, sizeof(riff_ogg_io_data), riff_ogg_io_read, NULL);
+        return new_sf;
+    }
+}
+
+#endif /* VGM_USE_VORBIS */
+
+#endif /* _RIFF_OGG_STREAMFILE_H_ */