diff --git a/src/coding/coding.h b/src/coding/coding.h
index 14ba7afa..1b3d52b1 100644
--- a/src/coding/coding.h
+++ b/src/coding/coding.h
@@ -270,7 +270,8 @@ void decode_ubi_adpcm(VGMSTREAM* vgmstream, sample_t* outbuf, int32_t samples_to
void reset_ubi_adpcm(ubi_adpcm_codec_data* data);
void seek_ubi_adpcm(ubi_adpcm_codec_data* data, int32_t num_sample);
void free_ubi_adpcm(ubi_adpcm_codec_data* data);
-int ubi_adpcm_get_samples(ubi_adpcm_codec_data* data);
+int32_t ubi_adpcm_get_samples(ubi_adpcm_codec_data* data);
+int32_t ubi_adpcm_bytes_to_samples(ubi_adpcm_codec_data* data, uint32_t size);
/* imuse_decoder */
diff --git a/src/coding/mta2_decoder.c b/src/coding/mta2_decoder.c
index 7afc7ea8..661e4efa 100644
--- a/src/coding/mta2_decoder.c
+++ b/src/coding/mta2_decoder.c
@@ -4,30 +4,37 @@
/* MTA2 decoder based on:
* - MGS Developer Wiki: https://www.mgsdevwiki.com/wiki/index.php/MTA2_(Codec) [codec by daemon1]
* - Solid4 tools: https://github.com/GHzGangster/Drebin
- * (PS3 probably uses floats, so this may not be 100% accurate)
+ * - Partially reverse engineered to fix tables
+ * - Internal codec name may be "vax2", with Mta2 being the file format.
*
* MTA2 layout:
* - data is divided into N tracks of 0x10 header + 0x90 frame per track channel, forming N streams
* ex: 8ch: track0 4ch + track1 4ch + track0 4ch + track1 4ch ...; or 2ch = 1ch track0 + 1ch track1
* * up to 16 possible tracks, but max seen is 3 (ex. track0=sneaking, track1=action, track2=ambience)
* - each ch frame is divided into 4 headers + 4 vertical groups with nibbles (0x4*4 + 0x20*4)
- * ex. group1 is 0x04(4) + 0x14(4) + 0x24(4) + 0x34(4) ... (vertically maybe for paralelism?)
+ * ex. group1 is 0x04(4) + 0x14(4) + 0x24(4) + 0x34(4) ... (seemingly for vector paralelism)
*
* Due to this vertical layout and multiple hist/indexes, it decodes everything in a block between calls
* but discards unwanted data, instead of trying to skip to the target nibble. Meaning no need to save hist, and
* expects samples_to_do to be block_samples at most (could be simplified, I guess).
*/
-/* tweaked XA/PSX coefs << 8 */
+/* tblSsw2Vax2K0 / K1 (extended from classic XA's K0/K1 */
+static const float VAX2_K0[8] = { 0.0, 0.9375, 1.796875, 1.53125, 1.90625, 1.796875, 1.796875, 0.9375 };
+static const float VAX2_K1[8] = { -0.0, -0.0, -0.8125, -0.859375, -0.9375, -0.9375, -0.859375, -0.40625 };
+/* tblSsw2Vax2Rng */
+static const float VAX2_RANGES[32] = {
+ 1.0, 1.3125, 1.6875, 2.25, 2.9375, 3.8125, 5.0, 6.5625,
+ 8.5625, 11.1875, 14.625, 19.125, 25.0, 32.75, 42.8125, 55.9375,
+ 73.1875, 95.6875, 125.1875, 163.6875, 214.0625, 279.9375, 366.125, 478.8125,
+ 626.125, 818.8125, 1070.8125, 1400.375, 1831.375, 2395.0, 3132.0625, 4096.0
+};
+
+/* somewhat equivalent tables K*2^8 (as found elsewhere): */
+# if 0
static const int16_t mta2_coefs[8][2] = {
- { 0, 0 },
- { 240, 0 },
- { 460, -208 },
- { 392, -220 },
- { 488, -240 },
- { 460, -240 },
- { 460, -220 },
- { 240, -104 }
+ { 0, 0 }, { 240, 0 }, { 460, -208 }, { 392, -220 },
+ { 488, -240 }, { 460, -240 }, { 460, -220 }, { 240, -104 }
};
static const int mta2_scales[32] = {
@@ -36,6 +43,7 @@ static const int mta2_scales[32] = {
18736, 24503, 32043, 41905, 54802, 71668, 93724, 122568,
160290, 209620, 274133, 358500, 468831, 613119, 801811, 1048576
};
+#endif
/* decodes a block for a channel */
void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int config) {
@@ -120,8 +128,8 @@ void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing,
uint32_t group_header = get_u32be(frame + track_channel*0x90 + group*0x4);
hist2 = (short) ((group_header >> 16) & 0xfff0); /* upper 16b discarding 4b */
hist1 = (short) ((group_header >> 4) & 0xfff0); /* lower 16b discarding 4b */
- coefs = (group_header >> 5) & 0x7; /* mid 3b */
- scale = group_header & 0x1f; /* lower 5b */
+ coefs = (group_header >> 5) & 0x07; /* mid 3b */
+ scale = (group_header >> 0) & 0x1f; /* lower 5b */
/* write header samples (skips the last 2 group nibbles), like Drebin's decoder
* last 2 nibbles and next 2 header hist should match though */
@@ -146,8 +154,11 @@ void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing,
sample = col&1 ? /* high nibble first */
get_low_nibble_signed(nibbles) :
get_high_nibble_signed(nibbles);
+#if 0
sample = sample * mta2_scales[scale];
sample = (sample + hist1 * mta2_coefs[coefs][0] + hist2 * mta2_coefs[coefs][1] + 128) >> 8;
+#endif
+ sample = (sample * VAX2_RANGES[scale] + hist1 * VAX2_K0[coefs] + hist2 * VAX2_K1[coefs]); /* f32 sample to int */
sample = clamp16(sample);
/* ignore last 2 nibbles (uses first 2 header samples) */
diff --git a/src/coding/ubi_adpcm_decoder.c b/src/coding/ubi_adpcm_decoder.c
index 0d7be9ef..948f039d 100644
--- a/src/coding/ubi_adpcm_decoder.c
+++ b/src/coding/ubi_adpcm_decoder.c
@@ -29,7 +29,7 @@ typedef struct {
uint32_t codes_per_subframe_last;
uint32_t codes_per_subframe;
uint32_t subframes_per_frame;
- uint32_t sample_rate;
+ uint32_t unknown18;
uint32_t unknown1c;
uint32_t unknown20;
uint32_t bits_per_sample;
@@ -179,18 +179,18 @@ void free_ubi_adpcm(ubi_adpcm_codec_data *data) {
/* ************************************************************************ */
static void read_header_state(uint8_t* data, ubi_adpcm_header_data* header) {
- header->signature = get_32bitLE(data + 0x00);
- header->sample_count = get_32bitLE(data + 0x04);
- header->subframe_count = get_32bitLE(data + 0x08);
- header->codes_per_subframe_last= get_32bitLE(data + 0x0c);
- header->codes_per_subframe = get_32bitLE(data + 0x10);
- header->subframes_per_frame = get_32bitLE(data + 0x14);
- header->sample_rate = get_32bitLE(data + 0x18); /* optional? */
- header->unknown1c = get_32bitLE(data + 0x1c); /* variable */
- header->unknown20 = get_32bitLE(data + 0x20); /* null? */
- header->bits_per_sample = get_32bitLE(data + 0x24);
- header->unknown28 = get_32bitLE(data + 0x28); /* 1~3? */
- header->channels = get_32bitLE(data + 0x2c);
+ header->signature = get_u32le(data + 0x00);
+ header->sample_count = get_u32le(data + 0x04);
+ header->subframe_count = get_u32le(data + 0x08);
+ header->codes_per_subframe_last= get_u32le(data + 0x0c);
+ header->codes_per_subframe = get_u32le(data + 0x10);
+ header->subframes_per_frame = get_u32le(data + 0x14);
+ header->unknown18 = get_u32le(data + 0x18); /* sometimes sample rate but algo other values (garbage?) */
+ header->unknown1c = get_u32le(data + 0x1c); /* variable */
+ header->unknown20 = get_u32le(data + 0x20); /* null? */
+ header->bits_per_sample = get_u32le(data + 0x24);
+ header->unknown28 = get_u32le(data + 0x28); /* 1~3? */
+ header->channels = get_u32le(data + 0x2c);
}
static int parse_header(STREAMFILE* sf, ubi_adpcm_codec_data* data, off_t offset) {
@@ -463,7 +463,7 @@ static void unpack_codes(uint8_t* data, uint8_t* codes, int code_count, int bps)
for (i = 0; i < code_count; i++) {
if (bits < bps) {
- uint32_t source32le = (uint32_t)get_32bitLE(data + pos);
+ uint32_t source32le = get_u32le(data + pos);
pos += 0x04;
input = (input << 32) | (uint64_t)source32le;
@@ -480,31 +480,31 @@ static void read_channel_state(uint8_t* data, ubi_adpcm_channel_data* ch) {
* probably exist for padding (original code uses MMX to operate in multiple 16b at the same time)
* or reserved for other bit modes */
- ch->signature = get_32bitLE(data + 0x00);
- ch->step1 = get_32bitLE(data + 0x04);
- ch->next1 = get_32bitLE(data + 0x08);
- ch->next2 = get_32bitLE(data + 0x0c);
+ ch->signature = get_u32le(data + 0x00);
+ ch->step1 = get_s32le(data + 0x04);
+ ch->next1 = get_s32le(data + 0x08);
+ ch->next2 = get_s32le(data + 0x0c);
- ch->coef1 = get_16bitLE(data + 0x10);
- ch->coef2 = get_16bitLE(data + 0x12);
- ch->unused1 = get_16bitLE(data + 0x14);
- ch->unused2 = get_16bitLE(data + 0x16);
- ch->mod1 = get_16bitLE(data + 0x18);
- ch->mod2 = get_16bitLE(data + 0x1a);
- ch->mod3 = get_16bitLE(data + 0x1c);
- ch->mod4 = get_16bitLE(data + 0x1e);
+ ch->coef1 = get_s16le(data + 0x10);
+ ch->coef2 = get_s16le(data + 0x12);
+ ch->unused1 = get_s16le(data + 0x14);
+ ch->unused2 = get_s16le(data + 0x16);
+ ch->mod1 = get_s16le(data + 0x18);
+ ch->mod2 = get_s16le(data + 0x1a);
+ ch->mod3 = get_s16le(data + 0x1c);
+ ch->mod4 = get_s16le(data + 0x1e);
- ch->hist1 = get_16bitLE(data + 0x20);
- ch->hist2 = get_16bitLE(data + 0x22);
- ch->unused3 = get_16bitLE(data + 0x24);
- ch->unused4 = get_16bitLE(data + 0x26);
- ch->delta1 = get_16bitLE(data + 0x28);
- ch->delta2 = get_16bitLE(data + 0x2a);
- ch->delta3 = get_16bitLE(data + 0x2c);
- ch->delta4 = get_16bitLE(data + 0x2e);
+ ch->hist1 = get_s16le(data + 0x20);
+ ch->hist2 = get_s16le(data + 0x22);
+ ch->unused3 = get_s16le(data + 0x24);
+ ch->unused4 = get_s16le(data + 0x26);
+ ch->delta1 = get_s16le(data + 0x28);
+ ch->delta2 = get_s16le(data + 0x2a);
+ ch->delta3 = get_s16le(data + 0x2c);
+ ch->delta4 = get_s16le(data + 0x2e);
- ch->delta5 = get_16bitLE(data + 0x30);
- ch->unused5 = get_16bitLE(data + 0x32);
+ ch->delta5 = get_s16le(data + 0x30);
+ ch->unused5 = get_s16le(data + 0x32);
VGM_ASSERT(ch->signature != 0x02, "UBI ADPCM: incorrect channel header\n");
VGM_ASSERT(ch->unused3 != 0x00, "UBI ADPCM: found unused3 used\n");
@@ -580,9 +580,28 @@ static void decode_frame(STREAMFILE* sf, ubi_adpcm_codec_data* data) {
}
-int ubi_adpcm_get_samples(ubi_adpcm_codec_data* data) {
+int32_t ubi_adpcm_get_samples(ubi_adpcm_codec_data* data) {
if (!data)
return 0;
return data->header.sample_count / data->header.channels;
}
+
+int32_t ubi_adpcm_bytes_to_samples(ubi_adpcm_codec_data* data, uint32_t size) {
+ uint32_t frame_size;
+
+ if (!data || !data->header.channels || !data->header.subframes_per_frame)
+ return 0;
+
+ /* don't trust subframe count */
+
+ size -= 0x30; /* header */
+
+ frame_size = 0x34 * data->header.channels; /* setup per channel */
+ frame_size += (data->header.codes_per_subframe * data->header.bits_per_sample /*+ 8*/) * data->header.subframes_per_frame / 8;
+ frame_size += data->header.subframes_per_frame * 0x01; /* padding byte */
+
+ return ((size - 0x01) / frame_size) * /* force smaller size so last frame isn't used */
+ data->header.codes_per_subframe * data->header.subframes_per_frame +
+ data->header.codes_per_subframe_last * data->header.subframes_per_frame;
+}
diff --git a/src/libvgmstream.vcxproj b/src/libvgmstream.vcxproj
index b57dda2b..f2d84dd6 100644
--- a/src/libvgmstream.vcxproj
+++ b/src/libvgmstream.vcxproj
@@ -175,6 +175,7 @@
+
@@ -727,6 +728,7 @@
+
diff --git a/src/libvgmstream.vcxproj.filters b/src/libvgmstream.vcxproj.filters
index 09f7c57d..a3c74ab1 100644
--- a/src/libvgmstream.vcxproj.filters
+++ b/src/libvgmstream.vcxproj.filters
@@ -323,6 +323,9 @@
Header Files
+
+ Header Files
+
@@ -1969,6 +1972,9 @@
Source Files
+
+ Source Files
+
meta\Source Files
diff --git a/src/meta/bkhd.c b/src/meta/bkhd.c
index 37f7dde7..2b255bfd 100644
--- a/src/meta/bkhd.c
+++ b/src/meta/bkhd.c
@@ -126,25 +126,14 @@ VGMSTREAM* init_vgmstream_bkhd(STREAMFILE* sf) {
/* detect format */
if (subfile_offset <= 0 || subfile_size <= 0) {
- /* some indexes don't have data */
is_dummy = 1;
- }
- else if (read_f32(subfile_offset + 0x02, sf) >= 30.0 &&
- read_f32(subfile_offset + 0x02, sf) <= 250.0) {
- /* ignore Wwise's custom .wmid (similar to a regular midi but with simplified
- * chunks and custom fields: 0x00=MThd's division, 0x02: bpm (new), etc) */
- is_wmid = 1;
- }
- /* default is riff/sfx */
-
-
- if (is_dummy || is_wmid) {
- /* for now leave a dummy song for easier .bnk index-to-subsong mapping */
+ /* rarely some indexes don't have data (early bnk)
+ * for now leave a dummy song for easier .bnk index-to-subsong mapping */
vgmstream = init_vgmstream_silence(0, 0, 0);
if (!vgmstream) goto fail;
}
else {
- /* could pass .wem but few files need memory .wem detection */
+ /* could pass .wem extension but few files need memory .wem detection */
temp_sf = setup_subfile_streamfile(sf, subfile_offset, subfile_size, NULL);
if (!temp_sf) goto fail;
@@ -153,13 +142,21 @@ VGMSTREAM* init_vgmstream_bkhd(STREAMFILE* sf) {
vgmstream = init_vgmstream_wwise_bnk(temp_sf, &prefetch);
if (!vgmstream) goto fail;
}
+ else if (read_f32(subfile_offset + 0x02, temp_sf) >= 30.0 &&
+ read_f32(subfile_offset + 0x02, temp_sf) <= 250.0) {
+ is_wmid = 1;
+ /* ignore Wwise's custom .wmid (similar to a regular midi but with simplified
+ * chunks and custom fields: 0x00=MThd's division, 0x02: bpm (new), etc) */
+ vgmstream = init_vgmstream_silence(0, 0, 0);
+ if (!vgmstream) goto fail;
+ }
else {
+ /* may fail if not an actual wfx */
vgmstream = init_vgmstream_bkhd_fx(temp_sf);
if (!vgmstream) goto fail;
}
}
-
vgmstream->num_streams = total_subsongs;
{
diff --git a/src/meta/sqex_sead.c b/src/meta/sqex_sead.c
index 651b7cf3..f1e30be7 100644
--- a/src/meta/sqex_sead.c
+++ b/src/meta/sqex_sead.c
@@ -79,9 +79,14 @@ VGMSTREAM* init_vgmstream_sqex_sead(STREAMFILE* sf) {
goto fail;
}
- /* SEAD handles both sab/mab in the same lib, and other similar files (config, engine, etc).
+ /* SEAD handles both sab/mab in the same lib (libsead), and other similar files (config, engine, etc).
* Has some chunks pointing to sections, and each section entry (usually starting with section
- * version/reserved/size) is always padded to 0x10. Most values are unsigned. */
+ * version/reserved/size) is always padded to 0x10. Most values are unsigned.
+ *
+ * "SEAD Engine" (Square Enix Application on Demand Engine) is/was SQEX's internal middleware (~2006),
+ * so it's possible SEAD refers to the whole thing rather than audio, but since .sab/mab audio lib typically goes
+ * with other engines it's hard to say if "libsead" is the whole engine but trimmed with only audio functions,
+ * or is a separate audio lib derived from this "SEAD Engine". */
sead.big_endian = guess_endianness16bit(0x06, sf); /* no flag, use size */
diff --git a/src/meta/sspf.c b/src/meta/sspf.c
index 649abf83..2929fa4e 100644
--- a/src/meta/sspf.c
+++ b/src/meta/sspf.c
@@ -1,6 +1,8 @@
#include "meta.h"
+static int freq_to_rate(int freq);
+
/* SSPF - Konami/KCET banks [Metal Gear Solid 4 (PS3)] */
VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) {
VGMSTREAM* vgmstream = NULL;
@@ -8,7 +10,7 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) {
int loop_flag, channels, sample_rate;
int32_t num_samples, loop_start;
int total_subsongs, target_subsong = sf->stream_index;
- uint32_t file_size, pad_size, offset, bwav_offset, iwav_offset, ssw2_offset, stream_size;
+ uint32_t file_size, pad_size, offset, bwav_offset, iwav_offset, wave_offset, stream_size;
uint32_t codec;
@@ -45,42 +47,43 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) {
offset = iwav_offset + 0x10 + (target_subsong - 1) * 0x20;
/* IWAV entry supposedly contains more info but seems only offset and some ID at 0x14, rest is 0 */
- ssw2_offset = read_u32be(offset + 0x00,sf) + bwav_offset;
- if (is_id32be(ssw2_offset,sf, "SSWF")) {
- /*
- 04 kType (always 0x01)
- 05 nChannels
- 06 freq
- 08 lpStart
- 0C nSamples
- */
+ wave_offset = read_u32be(offset + 0x00,sf) + bwav_offset;
+ if (is_id32be(wave_offset,sf, "SSWF")) {
+ codec = read_u8(wave_offset + 0x04,sf); /* kType (always 0x01) */
+ if (read_u8(wave_offset + 0x05,sf) != 0x01) /* nChannels? */
+ goto fail;
+ sample_rate = read_u16be(wave_offset + 0x06,sf); /* not freq (ex. 48000 is used) */
+ loop_start = read_s32be(wave_offset + 0x08,sf);
+ num_samples = read_s32be(wave_offset + 0x0c,sf);
- /* data is some unknown codec that seems to be ADPCM header + byte (simplified MTA2 with only 1 group?) */
- vgm_logi("SSPF: unsupported SSWF variant at %x\n", ssw2_offset);
- goto fail;
+ channels = 1;
+ loop_flag = loop_start != 0x7FFFFFFF;
+ start_offset = wave_offset + 0x10;
+
+ stream_size = 0x10 + (num_samples * channels * 0x02); /* implicit */
}
- else if (is_id32be(ssw2_offset,sf, "SSW2")) {
- stream_size = read_u32be(ssw2_offset + 0x04,sf);
+ else if (is_id32be(wave_offset,sf, "SSW2")) {
+ stream_size = read_u32be(wave_offset + 0x04,sf);
/* 08 version? (always 0) */
- num_samples = read_s32be(ssw2_offset + 0x0c,sf);
- codec = read_u32be(ssw2_offset + 0x10,sf); /* kType (always 0x21) */
- if (read_u32be(ssw2_offset + 0x10,sf) != 0x21)
+ num_samples = read_s32be(wave_offset + 0x0c,sf);
+ codec = read_u32be(wave_offset + 0x10,sf); /* kType (always 0x21) */
+ if (read_u32be(wave_offset + 0x10,sf) != 0x21)
goto fail;
- if (read_u8(ssw2_offset + 0x14,sf) != 0x08) /* nBlocks? */
+ if (read_u8(wave_offset + 0x14,sf) != 0x08) /* nBlocks? */
goto fail;
- if (read_u8(ssw2_offset + 0x15,sf) != 0x01) /* nChannels? */
+ if (read_u8(wave_offset + 0x15,sf) != 0x01) /* nChannels? */
goto fail;
channels = 1;
- sample_rate = read_u16be(ssw2_offset + 0x16,sf);
- loop_start = read_s32be(ssw2_offset + 0x18,sf);
+ sample_rate = freq_to_rate(read_u16be(wave_offset + 0x16,sf)); /* freq value */
+ loop_start = read_s32be(wave_offset + 0x18,sf);
/* 0x1c: lpStartAddr (0xFFFFFFFF is none) */
loop_flag = loop_start != 0x7FFFFFFF;
- start_offset = ssw2_offset + 0x20;
+ start_offset = wave_offset + 0x20;
}
else {
- vgm_logi("SSPF: unknown variant at %x\n", ssw2_offset);
+ vgm_logi("SSPF: unknown variant at %x\n", wave_offset);
goto fail;
}
@@ -99,6 +102,12 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) {
vgmstream->stream_size = stream_size;
switch (codec) {
+ case 0x01:
+ vgmstream->coding_type = coding_PCM16BE;
+ vgmstream->layout_type = layout_interleave;
+ vgmstream->interleave_block_size = 0x02;
+ break;
+
case 0x21:
vgmstream->coding_type = coding_MTA2;
vgmstream->codec_config = 1;
@@ -118,3 +127,33 @@ fail:
close_vgmstream(vgmstream);
return NULL;
}
+
+/* transforms internal freq to sample rate */
+static int freq_to_rate(int freq) {
+ /* from PowerPC code seems like it's trying something like this, but not quite (PPC is complex):
+ if ((freq & 0xFF) != 0)
+ return powf(10.0, 0.0117647 * (freq & 0xFF))) * 20.0;
+ return powf(10.0, 0.0117647 * 2048)) * 20.0; //???
+ */
+
+ //TODO improve, for now fake it
+ switch(freq) {
+ case 0x9000: return 24000; /* most voices, sounds right */
+ case 0xA200: return 48000; /* most sfx */
+ /* rest is rarely used for some sfx, so it's hard to guess actual frequency and this is just approximate */
+ case 0x9fcd: return 44100;
+ case 0x9c9c: return 39000;
+ case 0x9b79: return 38000;
+ case 0x9b13: return 37000;
+ case 0x9a88: return 36000;
+ case 0x9778: return 32000;
+ case 0x9401: return 28000;
+ case 0x8578: return 16000;
+ case 0x7e00: return 11050;
+ default:
+ VGM_LOG("SSPF: unknown freq %x\n", freq);
+ break;
+ }
+
+ return freq;
+}
diff --git a/src/meta/txth.c b/src/meta/txth.c
index 8f746f00..e2354841 100644
--- a/src/meta/txth.c
+++ b/src/meta/txth.c
@@ -2,8 +2,11 @@
#include "../coding/coding.h"
#include "../layout/layout.h"
#include "txth_streamfile.h"
+#include "../util/text_reader.h"
-#define TXT_LINE_MAX 0x2000
+#define TXT_LINE_MAX 2048 /* probably ~1000 would be ok */
+#define TXT_LINE_KEY_MAX 128
+#define TXT_LINE_VAL_MAX (TXT_LINE_MAX - TXT_LINE_KEY_MAX)
/* known TXTH types */
typedef enum {
@@ -862,7 +865,7 @@ static int get_padding_size(txth_header* txth, int discard_empty);
/* Simple text parser of "key = value" lines.
* The code is meh and error handling not exactly the best. */
static int parse_txth(txth_header* txth) {
- off_t txt_offset, file_size;
+ uint32_t txt_offset;
/* setup txth defaults */
if (txth->sf_body)
@@ -872,23 +875,28 @@ static int parse_txth(txth_header* txth) {
txt_offset = read_bom(txth->sf_text);
- file_size = get_streamfile_size(txth->sf_text);
/* read lines */
{
- char line[TXT_LINE_MAX];
- char key[TXT_LINE_MAX];
- char val[TXT_LINE_MAX];
- /* at least as big as a line to avoid overflows (I hope) */
+ text_reader_t tr;
+ uint8_t buf[TXT_LINE_MAX + 1];
+ char key[TXT_LINE_KEY_MAX];
+ char val[TXT_LINE_VAL_MAX];
+ int ok, line_len;
+ char* line;
- while (txt_offset < file_size) {
- int ok, bytes_read, line_ok;
+ if (!text_reader_init(&tr, buf, sizeof(buf), txth->sf_text, txt_offset, 0))
+ goto fail;
- bytes_read = read_line(line, sizeof(line), txt_offset, txth->sf_text, &line_ok);
- if (!line_ok) goto fail;
- //;VGM_LOG("TXTH: line=%s\n",line);
+ do {
+ line_len = text_reader_get_line(&tr, &line);
+ if (line_len < 0) goto fail; /* too big for buf (maybe not text)) */
- txt_offset += bytes_read;
+ if (line == NULL) /* EOF */
+ break;
+
+ if (line_len == 0) /* empty */
+ continue;
/* get key/val (ignores lead spaces, stops at space/comment/separator) */
ok = sscanf(line, " %[^ \t#=] = %[^\t#\r\n] ", key,val);
@@ -897,7 +905,8 @@ static int parse_txth(txth_header* txth) {
if (!parse_keyval(txth->sf, txth, key, val)) /* read key/val */
goto fail;
- }
+
+ } while (line_len >= 0);
}
if (!txth->loop_flag_set)
diff --git a/src/meta/txtp.c b/src/meta/txtp.c
index ad51936d..72df65d7 100644
--- a/src/meta/txtp.c
+++ b/src/meta/txtp.c
@@ -3,11 +3,14 @@
#include "../layout/layout.h"
#include "../mixing.h"
#include "../plugins.h"
+#include "../util/text_reader.h"
#include
-#define TXTP_LINE_MAX 1024
+#define TXT_LINE_MAX 2048 /* some wwise .txtp get wordy */
+#define TXT_LINE_KEY_MAX 128
+#define TXT_LINE_VAL_MAX (TXT_LINE_MAX - TXT_LINE_KEY_MAX)
#define TXTP_MIXING_MAX 512
#define TXTP_GROUP_MODE_SEGMENTED 'S'
#define TXTP_GROUP_MODE_LAYERED 'L'
@@ -68,7 +71,7 @@ typedef struct {
typedef struct {
/* main entry */
- char filename[TXTP_LINE_MAX];
+ char filename[TXT_LINE_MAX];
int silent;
/* TXTP settings (applied at the end) */
@@ -1271,7 +1274,7 @@ static inline int is_match(const char* str1, const char* str2) {
static void parse_params(txtp_entry* entry, char* params) {
/* parse params: #(commands) */
int n, nc, nm, mc;
- char command[TXTP_LINE_MAX];
+ char command[TXT_LINE_MAX];
play_config_t* tcfg = &entry->config;
entry->range_start = 0;
@@ -1802,7 +1805,7 @@ fail:
static int is_substring(const char* val, const char* cmp) {
int n;
- char subval[TXTP_LINE_MAX];
+ char subval[TXT_LINE_MAX];
/* read string without trailing spaces or comments/commands */
if (sscanf(val, " %s%n[^ #\t\r\n]%n", subval, &n, &n) != 1)
@@ -1862,12 +1865,12 @@ static int parse_keyval(txtp_header* txtp, const char* key, const char* val) {
}
}
else if (0==strcmp(key,"commands")) {
- char val2[TXTP_LINE_MAX];
+ char val2[TXT_LINE_MAX];
strcpy(val2, val); /* copy since val is modified here but probably not important */
if (!add_entry(txtp, val2, 1)) goto fail;
}
else if (0==strcmp(key,"group")) {
- char val2[TXTP_LINE_MAX];
+ char val2[TXT_LINE_MAX];
strcpy(val2, val); /* copy since val is modified here but probably not important */
if (!add_group(txtp, val2)) goto fail;
@@ -1884,7 +1887,7 @@ fail:
static txtp_header* parse_txtp(STREAMFILE* sf) {
txtp_header* txtp = NULL;
- off_t txt_offset, file_size;
+ uint32_t txt_offset;
txtp = calloc(1,sizeof(txtp_header));
@@ -1894,23 +1897,28 @@ static txtp_header* parse_txtp(STREAMFILE* sf) {
txtp->is_segmented = 1;
txt_offset = read_bom(sf);
- file_size = get_streamfile_size(sf);
/* read and parse lines */
{
- char line[TXTP_LINE_MAX];
- char key[TXTP_LINE_MAX];
- char val[TXTP_LINE_MAX];
- char filename[TXTP_LINE_MAX];
- /* at least as big as a line to avoid overflows (I hope) */
+ text_reader_t tr;
+ uint8_t buf[TXT_LINE_MAX + 1];
+ char key[TXT_LINE_KEY_MAX];
+ char val[TXT_LINE_VAL_MAX];
+ int ok, line_len;
+ char* line;
- while (txt_offset < file_size) {
- int ok, bytes_read, line_ok;
+ if (!text_reader_init(&tr, buf, sizeof(buf), sf, txt_offset, 0))
+ goto fail;
- bytes_read = read_line(line, sizeof(line), txt_offset, sf, &line_ok);
- if (!line_ok) goto fail;
+ do {
+ line_len = text_reader_get_line(&tr, &line);
+ if (line_len < 0) goto fail; /* too big for buf (maybe not text)) */
- txt_offset += bytes_read;
+ if (line == NULL) /* EOF */
+ break;
+
+ if (line_len == 0) /* empty */
+ continue;
/* try key/val (ignores lead/trail spaces, # may be commands or comments) */
ok = sscanf(line, " %[^ \t#=] = %[^\t\r\n] ", key,val);
@@ -1921,16 +1929,17 @@ static txtp_header* parse_txtp(STREAMFILE* sf) {
}
/* must be a filename (only remove spaces from start/end, as filenames con contain mid spaces/#/etc) */
- ok = sscanf(line, " %[^\t\r\n] ", filename);
+ ok = sscanf(line, " %[^\t\r\n] ", val);
if (ok != 1) /* not a filename either */
continue;
- if (filename[0] == '#')
+ if (val[0] == '#')
continue; /* simple comment */
/* filename with settings */
- if (!add_entry(txtp, filename, 0))
+ if (!add_entry(txtp, val, 0))
goto fail;
- }
+
+ } while (line_len >= 0);
}
/* mini-txth: if no entries are set try with filename, ex. from "song.ext#3.txtp" use "song.ext#3"
diff --git a/src/meta/ubi_hx.c b/src/meta/ubi_hx.c
index e43379a3..3b80d4b8 100644
--- a/src/meta/ubi_hx.c
+++ b/src/meta/ubi_hx.c
@@ -4,11 +4,12 @@
#include "../util/endianness.h"
-typedef enum { PCM, UBI, PSX, DSP, XIMA, ATRAC3, XMA2, MP3 } ubi_hx_codec;
+typedef enum { PCM, UBI, PSX, DSP, XIMA, ATRAC3, XMA2, MP3, SILENCE } ubi_hx_codec;
typedef struct {
int big_endian;
int total_subsongs;
+ int is_riff;
int codec_id;
ubi_hx_codec codec; /* unified codec */
@@ -68,7 +69,7 @@ VGMSTREAM* init_vgmstream_ubi_hx(STREAMFILE* sf) {
* Game seems to play files by calling linked ids: EventResData (play/stop/etc) > Random/Program/Wav ResData (1..N refs) > FileIdObj */
/* HX HEADER */
- hx.big_endian = guess_endianness32bit(0x00, sf);
+ hx.big_endian = guess_endian32(0x00, sf);
if (!parse_hx(&hx, sf, target_subsong))
goto fail;
@@ -140,13 +141,19 @@ fail:
static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) {
read_u32_t read_u32 = hx->big_endian ? read_u32be : read_u32le;
read_s32_t read_s32 = hx->big_endian ? read_s32be : read_s32le;
- off_t index_offset, offset;
+ uint32_t index_type, index_offset, offset;
int i, index_entries;
char class_name[255];
index_offset = read_u32(0x00, sf);
+ index_type = read_u32(index_offset + 0x04, sf);
index_entries = read_s32(index_offset + 0x08, sf);
+
+ /* doesn't seem to have names (no way to link) */
+ if (index_type == 0x01)
+ return 1;
+
offset = index_offset + 0x0c;
for (i = 0; i < index_entries; i++) {
off_t header_offset;
@@ -169,29 +176,34 @@ static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) {
//unknown_count = read_s32(offset + 0x00, sf);
offset += 0x04;
- link_count = read_s32(offset + 0x00, sf);
- offset += 0x04;
- for (j = 0; j < link_count; j++) {
- uint32_t link_id1 = read_u32(offset + 0x00, sf);
- uint32_t link_id2 = read_u32(offset + 0x04, sf);
-
- if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) {
- is_found = 1;
- }
- offset += 0x08;
+ if (index_type == 0x01) {
+ goto fail;
}
+ else {
+ link_count = read_s32(offset + 0x00, sf);
+ offset += 0x04;
+ for (j = 0; j < link_count; j++) {
+ uint32_t link_id1 = read_u32(offset + 0x00, sf);
+ uint32_t link_id2 = read_u32(offset + 0x04, sf);
- language_count = read_s32(offset + 0x00, sf);
- offset += 0x04;
- for (j = 0; j < language_count; j++) {
- uint32_t link_id1 = read_u32(offset + 0x08, sf);
- uint32_t link_id2 = read_u32(offset + 0x0c, sf);
-
- if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) {
- is_found = 1;
+ if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) {
+ is_found = 1;
+ }
+ offset += 0x08;
}
- offset += 0x10;
+ language_count = read_s32(offset + 0x00, sf);
+ offset += 0x04;
+ for (j = 0; j < language_count; j++) {
+ uint32_t link_id1 = read_u32(offset + 0x08, sf);
+ uint32_t link_id2 = read_u32(offset + 0x0c, sf);
+
+ if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) {
+ is_found = 1;
+ }
+
+ offset += 0x10;
+ }
}
/* identify all possible names so unknown platforms fail */
@@ -228,6 +240,7 @@ static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) {
}
fail:
+ vgm_logi("UBI HX: error parsing name at %x (report)\n", index_offset);
return 0;
}
@@ -265,11 +278,14 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
uint32_t flag_type = read_u32(offset + 0x00, sf);
if (flag_type == 0x01 || flag_type == 0x02) { /* Rayman Arena */
- uint32_t unk_value = read_u32(offset + 0x04, sf);
- if (unk_value != 0x00 && /* common */
- unk_value != 0xbe570a3d && /* Largo Winch: Empire Under Threat (PC)-most */
- unk_value != 0xbf8e147b) /* Largo Winch: Empire Under Threat (PC)-few */
+ uint32_t unk_value = read_u32(offset + 0x04, sf); /* float? */
+ if (unk_value != 0x00 && /* common */
+ unk_value != 0xbe570a3d && /* Largo Winch: Empire Under Threat (PC)-most */
+ unk_value != 0xbf8e147b) { /* Largo Winch: Empire Under Threat (PC)-few */
+ VGM_LOG("ubi hx: unknown flag\n");
goto fail;
+ }
+
hx->stream_mode = read_u32(offset + 0x08, sf); /* flag: 0=internal, 1=external */
/* 0x0c: flag: 0=static, 1=stream */
offset += 0x10;
@@ -279,7 +295,8 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
offset += 0x08;
if (strcmp(hx->class_name, "CGCWaveFileIdObj") == 0) {
- if (read_u32(offset + 0x00, sf) != read_u32(offset + 0x04, sf)) goto fail; /* meaning? */
+ if (read_u32(offset + 0x00, sf) != read_u32(offset + 0x04, sf))
+ goto fail; /* meaning? */
hx->stream_mode = read_u32(offset + 0x04, sf);
offset += 0x08;
}
@@ -302,6 +319,7 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
//todo probably a flag: &1=external, &2=stream, &8=has adjust (XIII), &4=??? (XIII PS2, small, mono)
switch(hx->stream_mode) {
case 0x00: /* memory (internal file) */
+ case 0x02: /* same (no diffs in size/channels/etc?) [Rayman 3 demo (PC)] */
riff_offset = offset;
riff_size = read_u32(riff_offset + 0x04, sf) + 0x08;
break;
@@ -321,13 +339,17 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
break;
default:
- VGM_LOG("ubi hx: %x\n", hx->stream_mode);
+ VGM_LOG("ubi hx: unknown stream mode %x\n", hx->stream_mode);
goto fail;
}
/* parse pseudo-RIFF "fmt" */
- if (read_u32(riff_offset, sf) != 0x46464952) /* "RIFF" in machine endianness */
+ if (read_u32(riff_offset, sf) != 0x46464952) { /* "RIFF" in machine endianness */
+ VGM_LOG("ubi hx: unknown RIFF\n");
goto fail;
+ }
+
+ hx->is_riff = 1;
hx->codec_id = read_u16(riff_offset + 0x14 , sf);
switch(hx->codec_id) {
@@ -357,12 +379,15 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
hx->stream_offset = read_u32(chunk_offset + 0x00, sf) + stream_adjust;
}
else {
+ VGM_LOG("ubi hx: unknown chunk\n");
goto fail;
}
}
else {
- if (!find_chunk_riff_ve(sf, 0x61746164,riff_offset + 0x0c,riff_size - 0x0c, &chunk_offset,&chunk_size, hx->big_endian))
+ if (!find_chunk_riff_ve(sf, 0x61746164,riff_offset + 0x0c,riff_size - 0x0c, &chunk_offset,&chunk_size, hx->big_endian)) {
+ VGM_LOG("ubi hx: unknown chunk RIFF\n");
goto fail;
+ }
hx->stream_offset = chunk_offset;
if (chunk_size > riff_size - (chunk_offset - riff_offset) || !chunk_size)
@@ -384,7 +409,11 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
//todo some dummy files have 0 size
- if (read_u32(offset + 0x00, sf) != 0x01) goto fail;
+ if (read_u32(offset + 0x00, sf) != 0x01) {
+ VGM_LOG("ubi hx: unknown flag non 0x01\n");
+ goto fail;
+ }
+
/* 0x04: some kind of parent id shared by multiple Waves, or 0 */
offset += 0x08;
@@ -400,7 +429,9 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
switch(hx->channels) {
case 0x48: hx->channels = 1; break;
case 0x90: hx->channels = 2; break;
- default: goto fail;
+ default:
+ VGM_LOG("ubi hx: channel type %x\n", hx->channels);
+ goto fail;
}
hx->sample_rate = (read_u16(offset + 0x02, sf) & 0x7FFFu) << 1u; /* ??? */
cue_flag = read_u8(offset + 0x03, sf) & (1 << 7);
@@ -461,6 +492,7 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint
}
}
else {
+ VGM_LOG("ubi hx: unknown type\n");
goto fail;
}
@@ -478,13 +510,21 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) {
uint32_t index_offset, offset;
int i, index_entries;
char class_name[255];
+ uint32_t index_type;
index_offset = read_u32(0x00, sf);
- if (read_u32(index_offset + 0x00, sf) != get_id32be("XDNI")) /* (INDX in given endianness) */
+ if (read_u32(index_offset + 0x00, sf) != get_id32be("XDNI")) { /* (INDX in given endianness) */
+ VGM_LOG("ubi hx: unknown index\n");
goto fail;
- if (read_u32(index_offset + 0x04, sf) != 0x02) /* type? */
+ }
+
+ /* usually 0x02, rarely 0x01 [Rayman M demo (PS2)] */
+ index_type = read_u32(index_offset + 0x04, sf);
+ if (index_type != 0x01 && index_type != 0x02) {
+ VGM_LOG("ubi hx: unknown index type\n");
goto fail;
+ }
if (target_subsong == 0) target_subsong = 1;
@@ -517,23 +557,29 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) {
}
offset += 0x04;
- /* ids that this object directly points to (ex. Event > Random) */
- link_count = read_s32(offset + 0x00, sf);
- offset += 0x04 + 0x08 * link_count;
+ if (index_type == 0x01) {
+ link_count = 0;
+ language_count = 0;
+ }
+ else {
+ /* ids that this object directly points to (ex. Event > Random) */
+ link_count = read_s32(offset + 0x00, sf);
+ offset += 0x04 + 0x08 * link_count;
- /* localized id list of WavRes (can use this list instead of the prev one) */
- language_count = read_s32(offset + 0x00, sf);
- offset += 0x04;
- for (j = 0; j < language_count; j++) {
- /* 0x00: lang code, in reverse endianness: "en ", "fr ", etc */
- /* 0x04: possibly count of ids for this lang */
- /* 0x08: id1+2 */
+ /* localized id list of WavRes (can use this list instead of the prev one) */
+ language_count = read_s32(offset + 0x00, sf);
+ offset += 0x04;
+ for (j = 0; j < language_count; j++) {
+ /* 0x00: lang code, in reverse endianness: "en ", "fr ", etc */
+ /* 0x04: possibly count of ids for this lang */
+ /* 0x08: id1+2 */
- if (read_u32(offset + 0x04, sf) != 1) {
- VGM_LOG("ubi hx: wrong lang count near %x\n", offset);
- goto fail; /* WavRes doesn't have this field */
+ if (read_u32(offset + 0x04, sf) != 1) {
+ VGM_LOG("ubi hx: wrong lang count near %x\n", offset);
+ goto fail; /* WavRes doesn't have this field */
+ }
+ offset += 0x10;
}
- offset += 0x10;
}
//todo figure out CProgramResData sequences
@@ -571,6 +617,7 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) {
goto fail;
}
+ /* should only exist on non-wave objects (like CProgramResData) */
if (link_count != 0) {
vgm_logi("UBI HX: found links in wav object (report)\n");
goto fail;
@@ -632,6 +679,12 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf
sb = sf;
}
+ /* very rarely a game uses Ubi ADPCM, but data is empty and has missing header [Rayman 3 demo 3 (PC) fixe.hxc#84] */
+ if (hx->is_riff && hx->codec == UBI) { //todo improve
+ if (read_u32le(hx->stream_offset, sb) == 0x02) {
+ hx->codec = SILENCE;
+ }
+ }
/* build the VGMSTREAM */
vgmstream = allocate_vgmstream(hx->channels, hx->loop_flag);
@@ -658,6 +711,12 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf
vgmstream->layout_type = layout_none;
vgmstream->num_samples = ubi_adpcm_get_samples(vgmstream->codec_data);
+
+ /* some kind of internal bug I guess, seen in a few subsongs in Rayman 3 PC demo, other values are also buggy */
+ if (vgmstream->num_samples == 0x77E7A374) {
+ vgmstream->num_samples = ubi_adpcm_bytes_to_samples(vgmstream->codec_data, hx->stream_size);
+ }
+
/* XIII has 6-bit stereo music, Rayman 3 4-bit music, both use 6-bit mono) */
break;
@@ -745,6 +804,13 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf
break;
}
#endif
+
+ case SILENCE: /* special hack */
+ vgmstream->coding_type = coding_SILENCE;
+ vgmstream->layout_type = layout_none;
+
+ vgmstream->num_samples = ps_bytes_to_samples(hx->stream_size, hx->channels);
+ break;
default:
goto fail;
}
diff --git a/src/util/text_reader.c b/src/util/text_reader.c
new file mode 100644
index 00000000..4d8db192
--- /dev/null
+++ b/src/util/text_reader.c
@@ -0,0 +1,187 @@
+#include
+#include "text_reader.h"
+#include "log.h"
+
+
+/* convenience function to init the above struct */
+int text_reader_init(text_reader_t* tr, uint8_t* buf, int buf_size, STREAMFILE* sf, uint32_t offset, uint32_t max) {
+ memset(tr, 0, sizeof(text_reader_t));
+
+ if (buf_size <= 1 || !buf || !sf)
+ return 0;
+
+ tr->buf = buf;
+ tr->buf_size = buf_size;
+ tr->sf = sf;
+ tr->offset = offset;
+
+ if (!max)
+ max = get_streamfile_size(sf) - offset;
+ tr->max_offset = max;
+
+ return 1;
+}
+
+
+/* reads more data into buf and adjust values */
+static void prepare_buf(text_reader_t* tr) {
+
+ /* since we may read N lines in the same buffer, move starting pos each call */
+ tr->pos = tr->next_pos;
+
+ /* not more data (but may still read lines so not an error) */
+ if (tr->offset >= tr->max_offset) {
+ return;
+ }
+
+ /* request more data */
+ if (tr->pos >= tr->filled) {
+ tr->pos = 0;
+ tr->filled = 0;
+ }
+
+ /* partially filled, move buffer */
+ if (tr->pos > 0) {
+ int move_size = tr->filled - tr->pos;
+
+ memmove(tr->buf, &tr->buf[tr->pos], move_size); /* memmove = may overlap */
+ tr->filled -= tr->pos; /* now less filled */
+ tr->pos = 0;
+ }
+
+ /* has enough data */
+ if (tr->filled >= tr->buf_size) {
+ return;
+ }
+
+ /* read buf up to max */
+ {
+ int bytes;
+ int read_size = tr->buf_size - tr->filled;
+ if (read_size + tr->offset > tr->max_offset)
+ read_size = tr->max_offset - tr->offset;
+
+ if (read_size <= 0) { /* ??? */
+ bytes = 0;
+ }
+ else {
+ if (tr->filled + read_size >= tr->buf_size)
+ read_size -= 1; /* always leave an extra byte for c-string null */
+
+ bytes = read_streamfile(tr->buf + tr->filled, tr->offset, read_size, tr->sf);
+ tr->offset += bytes;
+ tr->filled += bytes;
+ }
+
+ /* maybe some internal issue, force EOF */
+ if (bytes == 0) {
+ tr->offset = tr->max_offset;
+ }
+
+ /* ensure no old data is used as valid (simplifies some checks during parse) */
+ tr->buf[tr->filled] = '\0';
+ }
+}
+
+static void parse_buf(text_reader_t* tr) {
+ int i;
+
+ tr->line = (char*)&tr->buf[tr->pos];
+ tr->line_len = 0;
+ tr->line_ok = 0;
+
+ /* detect EOF (this should only happen if no more data was loaded) */
+ if (tr->pos == tr->filled) {
+ tr->line = NULL;
+ tr->line_ok = 1;
+ tr->line_len = 0;
+ return;
+ }
+
+ /* assumes filled doesn't reach buf_size (to allow trailing \0 after filled) */
+ for (i = tr->pos; i < tr->filled; i++) {
+ char c = (char)tr->buf[i];
+
+ if (c == '\0') {
+ i++;
+ break; /* not a valid file? (line_ok=0) */
+ }
+
+ if (c == '\r' && tr->buf[i+1] == '\n') { /* CRLF (0x0d0a) */
+ /* i+1 may read past filled but it's pre-set to \0 */
+ i += 2; //todo check that i < buf_size-1
+ tr->line_ok = 1;
+ break;
+ }
+ else if (c == '\n') { /* LF (0x0a) */
+ i++;
+ tr->line_ok = 1;
+ break;
+ }
+ else if (c == '\r') { /* CR (0x0d) */
+ i++;
+ tr->line_ok = (i < tr->buf_size - 1);
+ /* if buf ends with a CR, next buf may start be a LF (single CRLF), so line is not ok near buf end
+ * (old Macs use single \r as lines, but using only that and reaching buf end should happen rarely) */
+ break;
+ }
+
+ tr->line_len++;
+ }
+
+ /* when lines are small may read up to filled smaller than buf, with no more data */
+ if (!tr->line_ok && i == tr->filled)
+ tr->line_ok = (tr->filled < tr->buf_size - 1);
+
+ /* added after proper line (a \n) or after buf end, so we aren't changing valid data */
+ tr->buf[tr->pos + tr->line_len] = '\0';
+ tr->next_pos = i;
+}
+
+int text_reader_get_line(text_reader_t* tr, char** p_line) {
+
+ if (!tr->buf) /* no init */
+ return 0;
+
+ /* how it works:
+ * - fills buffer up to max or buf_len, from pos 0
+ * - counts from 0 to next '\n' or EOF
+ * - nulls \n or after EOF to make a proper c-string
+ * - returns from string from pos 0 to len
+ * - on next call rather than re-reading continues from pos N (after \n)
+ * - a buf will likely contain multiple lines
+ * - if read chars reach buf_end (no proper line found):
+ * - pos = 0: buf isn't big enough, error
+ * - pos > 0: move data to pos=0, fill rest of buf, fill rest of buf
+ *
+ * ex.
+ * - parse buf: read chunk full [aaaaa\nbbbb] (pos = 0)
+ * - get line: returns "aaaaa\0" (next_pos points to first 'b')
+ * - get line: from 'b', but reaches buf end before \n or EOF: must readjust
+ * - parse buf: move chunk part [bbbb*******] ('b' to beginning, * is garbage)
+ * - parse buf: read chunk part [bbbbbb\ncc_] (reaches EOF)
+ * - get line: returns "bbbbbb\0" (pos points to first c)
+ * - get line: returns "cc\0"
+ * - get line: returns NULL (reached EOF, no more bytes)
+ * - (there is an implicit \0 reserved in buf)
+ *
+ * ex.
+ * - start: read chunk [aaaaaaaaaaa]
+ * - get line: reaches buf end, but didn't reach EOF nor \n: error, can't store line
+ */
+
+ prepare_buf(tr); /* may not do anything */
+ parse_buf(tr); /* next line */
+
+ /* if we are reading a partial line there may be more data */
+ if (!tr->line_ok && tr->pos > 0) {
+ prepare_buf(tr);
+ parse_buf(tr); /* could continue from prev parse but makes logic more complex for little gain */
+ }
+
+ /* always output line even if truncated */
+ if (p_line) *p_line = tr->line;
+ return !tr->line_ok ?
+ -(tr->line_len + 1) : /* -0 also is possible, force -1 */
+ tr->line_len;
+}
diff --git a/src/util/text_reader.h b/src/util/text_reader.h
new file mode 100644
index 00000000..93dc51ab
--- /dev/null
+++ b/src/util/text_reader.h
@@ -0,0 +1,43 @@
+#ifndef _TEXT_READER_H_
+#define _TEXT_READER_H_
+
+
+/* Reader tuned for whole text files, reading chunks to minimize I/O with a single buffer.
+ * For short lines read_line may be more appropriate (reads up to line end, while this reads bigger chunks),
+ * which also allow \0 (this reader returns an error).
+ * NOTE: modifies passed buffer (lines are forced to end with \0 rather than \n).
+ *
+ * Usage: set text_reader_t and defaults with text_reader_init, call text_reader_get_line(...) to get lines.
+ * buf may be size+1 to allow 2^N chunk reads + trailing \0 (better performance?).
+ */
+
+#include "../streamfile.h"
+
+typedef struct {
+ /* init */
+ uint8_t* buf; /* where data will be read */
+ int buf_size; /* size of the struct (also max line size) */
+ STREAMFILE* sf; /* used to read data */
+ uint32_t offset; /* sf pos */
+ uint32_t max_offset; /* sf max */
+
+ /* internal */
+ int filled; /* current buf bytes */
+ int pos; /* current buf pos (last line) */
+ int next_pos; /* buf pos on next call, after line end */
+ int line_ok; /* current line is fully correct */
+
+ char* line;
+ int line_len;
+} text_reader_t;
+
+
+/* convenience function to init the above struct */
+int text_reader_init(text_reader_t* tr, uint8_t* buf, int buf_size, STREAMFILE* sf, uint32_t offset, uint32_t max);
+
+/* Reads and sets next line, or NULL if no lines are found (EOF).
+ * returns line length (0 for empty lines), or <0 if line was too long to store in buf.
+ * Will always return a valid (null terminated) string. */
+int text_reader_get_line(text_reader_t* tr, char** p_line);
+
+#endif