diff --git a/src/coding/coding.h b/src/coding/coding.h index 14ba7afa..1b3d52b1 100644 --- a/src/coding/coding.h +++ b/src/coding/coding.h @@ -270,7 +270,8 @@ void decode_ubi_adpcm(VGMSTREAM* vgmstream, sample_t* outbuf, int32_t samples_to void reset_ubi_adpcm(ubi_adpcm_codec_data* data); void seek_ubi_adpcm(ubi_adpcm_codec_data* data, int32_t num_sample); void free_ubi_adpcm(ubi_adpcm_codec_data* data); -int ubi_adpcm_get_samples(ubi_adpcm_codec_data* data); +int32_t ubi_adpcm_get_samples(ubi_adpcm_codec_data* data); +int32_t ubi_adpcm_bytes_to_samples(ubi_adpcm_codec_data* data, uint32_t size); /* imuse_decoder */ diff --git a/src/coding/mta2_decoder.c b/src/coding/mta2_decoder.c index 7afc7ea8..661e4efa 100644 --- a/src/coding/mta2_decoder.c +++ b/src/coding/mta2_decoder.c @@ -4,30 +4,37 @@ /* MTA2 decoder based on: * - MGS Developer Wiki: https://www.mgsdevwiki.com/wiki/index.php/MTA2_(Codec) [codec by daemon1] * - Solid4 tools: https://github.com/GHzGangster/Drebin - * (PS3 probably uses floats, so this may not be 100% accurate) + * - Partially reverse engineered to fix tables + * - Internal codec name may be "vax2", with Mta2 being the file format. * * MTA2 layout: * - data is divided into N tracks of 0x10 header + 0x90 frame per track channel, forming N streams * ex: 8ch: track0 4ch + track1 4ch + track0 4ch + track1 4ch ...; or 2ch = 1ch track0 + 1ch track1 * * up to 16 possible tracks, but max seen is 3 (ex. track0=sneaking, track1=action, track2=ambience) * - each ch frame is divided into 4 headers + 4 vertical groups with nibbles (0x4*4 + 0x20*4) - * ex. group1 is 0x04(4) + 0x14(4) + 0x24(4) + 0x34(4) ... (vertically maybe for paralelism?) + * ex. group1 is 0x04(4) + 0x14(4) + 0x24(4) + 0x34(4) ... (seemingly for vector paralelism) * * Due to this vertical layout and multiple hist/indexes, it decodes everything in a block between calls * but discards unwanted data, instead of trying to skip to the target nibble. Meaning no need to save hist, and * expects samples_to_do to be block_samples at most (could be simplified, I guess). */ -/* tweaked XA/PSX coefs << 8 */ +/* tblSsw2Vax2K0 / K1 (extended from classic XA's K0/K1 */ +static const float VAX2_K0[8] = { 0.0, 0.9375, 1.796875, 1.53125, 1.90625, 1.796875, 1.796875, 0.9375 }; +static const float VAX2_K1[8] = { -0.0, -0.0, -0.8125, -0.859375, -0.9375, -0.9375, -0.859375, -0.40625 }; +/* tblSsw2Vax2Rng */ +static const float VAX2_RANGES[32] = { + 1.0, 1.3125, 1.6875, 2.25, 2.9375, 3.8125, 5.0, 6.5625, + 8.5625, 11.1875, 14.625, 19.125, 25.0, 32.75, 42.8125, 55.9375, + 73.1875, 95.6875, 125.1875, 163.6875, 214.0625, 279.9375, 366.125, 478.8125, + 626.125, 818.8125, 1070.8125, 1400.375, 1831.375, 2395.0, 3132.0625, 4096.0 +}; + +/* somewhat equivalent tables K*2^8 (as found elsewhere): */ +# if 0 static const int16_t mta2_coefs[8][2] = { - { 0, 0 }, - { 240, 0 }, - { 460, -208 }, - { 392, -220 }, - { 488, -240 }, - { 460, -240 }, - { 460, -220 }, - { 240, -104 } + { 0, 0 }, { 240, 0 }, { 460, -208 }, { 392, -220 }, + { 488, -240 }, { 460, -240 }, { 460, -220 }, { 240, -104 } }; static const int mta2_scales[32] = { @@ -36,6 +43,7 @@ static const int mta2_scales[32] = { 18736, 24503, 32043, 41905, 54802, 71668, 93724, 122568, 160290, 209620, 274133, 358500, 468831, 613119, 801811, 1048576 }; +#endif /* decodes a block for a channel */ void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int config) { @@ -120,8 +128,8 @@ void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing, uint32_t group_header = get_u32be(frame + track_channel*0x90 + group*0x4); hist2 = (short) ((group_header >> 16) & 0xfff0); /* upper 16b discarding 4b */ hist1 = (short) ((group_header >> 4) & 0xfff0); /* lower 16b discarding 4b */ - coefs = (group_header >> 5) & 0x7; /* mid 3b */ - scale = group_header & 0x1f; /* lower 5b */ + coefs = (group_header >> 5) & 0x07; /* mid 3b */ + scale = (group_header >> 0) & 0x1f; /* lower 5b */ /* write header samples (skips the last 2 group nibbles), like Drebin's decoder * last 2 nibbles and next 2 header hist should match though */ @@ -146,8 +154,11 @@ void decode_mta2(VGMSTREAMCHANNEL *stream, sample_t *outbuf, int channelspacing, sample = col&1 ? /* high nibble first */ get_low_nibble_signed(nibbles) : get_high_nibble_signed(nibbles); +#if 0 sample = sample * mta2_scales[scale]; sample = (sample + hist1 * mta2_coefs[coefs][0] + hist2 * mta2_coefs[coefs][1] + 128) >> 8; +#endif + sample = (sample * VAX2_RANGES[scale] + hist1 * VAX2_K0[coefs] + hist2 * VAX2_K1[coefs]); /* f32 sample to int */ sample = clamp16(sample); /* ignore last 2 nibbles (uses first 2 header samples) */ diff --git a/src/coding/ubi_adpcm_decoder.c b/src/coding/ubi_adpcm_decoder.c index 0d7be9ef..948f039d 100644 --- a/src/coding/ubi_adpcm_decoder.c +++ b/src/coding/ubi_adpcm_decoder.c @@ -29,7 +29,7 @@ typedef struct { uint32_t codes_per_subframe_last; uint32_t codes_per_subframe; uint32_t subframes_per_frame; - uint32_t sample_rate; + uint32_t unknown18; uint32_t unknown1c; uint32_t unknown20; uint32_t bits_per_sample; @@ -179,18 +179,18 @@ void free_ubi_adpcm(ubi_adpcm_codec_data *data) { /* ************************************************************************ */ static void read_header_state(uint8_t* data, ubi_adpcm_header_data* header) { - header->signature = get_32bitLE(data + 0x00); - header->sample_count = get_32bitLE(data + 0x04); - header->subframe_count = get_32bitLE(data + 0x08); - header->codes_per_subframe_last= get_32bitLE(data + 0x0c); - header->codes_per_subframe = get_32bitLE(data + 0x10); - header->subframes_per_frame = get_32bitLE(data + 0x14); - header->sample_rate = get_32bitLE(data + 0x18); /* optional? */ - header->unknown1c = get_32bitLE(data + 0x1c); /* variable */ - header->unknown20 = get_32bitLE(data + 0x20); /* null? */ - header->bits_per_sample = get_32bitLE(data + 0x24); - header->unknown28 = get_32bitLE(data + 0x28); /* 1~3? */ - header->channels = get_32bitLE(data + 0x2c); + header->signature = get_u32le(data + 0x00); + header->sample_count = get_u32le(data + 0x04); + header->subframe_count = get_u32le(data + 0x08); + header->codes_per_subframe_last= get_u32le(data + 0x0c); + header->codes_per_subframe = get_u32le(data + 0x10); + header->subframes_per_frame = get_u32le(data + 0x14); + header->unknown18 = get_u32le(data + 0x18); /* sometimes sample rate but algo other values (garbage?) */ + header->unknown1c = get_u32le(data + 0x1c); /* variable */ + header->unknown20 = get_u32le(data + 0x20); /* null? */ + header->bits_per_sample = get_u32le(data + 0x24); + header->unknown28 = get_u32le(data + 0x28); /* 1~3? */ + header->channels = get_u32le(data + 0x2c); } static int parse_header(STREAMFILE* sf, ubi_adpcm_codec_data* data, off_t offset) { @@ -463,7 +463,7 @@ static void unpack_codes(uint8_t* data, uint8_t* codes, int code_count, int bps) for (i = 0; i < code_count; i++) { if (bits < bps) { - uint32_t source32le = (uint32_t)get_32bitLE(data + pos); + uint32_t source32le = get_u32le(data + pos); pos += 0x04; input = (input << 32) | (uint64_t)source32le; @@ -480,31 +480,31 @@ static void read_channel_state(uint8_t* data, ubi_adpcm_channel_data* ch) { * probably exist for padding (original code uses MMX to operate in multiple 16b at the same time) * or reserved for other bit modes */ - ch->signature = get_32bitLE(data + 0x00); - ch->step1 = get_32bitLE(data + 0x04); - ch->next1 = get_32bitLE(data + 0x08); - ch->next2 = get_32bitLE(data + 0x0c); + ch->signature = get_u32le(data + 0x00); + ch->step1 = get_s32le(data + 0x04); + ch->next1 = get_s32le(data + 0x08); + ch->next2 = get_s32le(data + 0x0c); - ch->coef1 = get_16bitLE(data + 0x10); - ch->coef2 = get_16bitLE(data + 0x12); - ch->unused1 = get_16bitLE(data + 0x14); - ch->unused2 = get_16bitLE(data + 0x16); - ch->mod1 = get_16bitLE(data + 0x18); - ch->mod2 = get_16bitLE(data + 0x1a); - ch->mod3 = get_16bitLE(data + 0x1c); - ch->mod4 = get_16bitLE(data + 0x1e); + ch->coef1 = get_s16le(data + 0x10); + ch->coef2 = get_s16le(data + 0x12); + ch->unused1 = get_s16le(data + 0x14); + ch->unused2 = get_s16le(data + 0x16); + ch->mod1 = get_s16le(data + 0x18); + ch->mod2 = get_s16le(data + 0x1a); + ch->mod3 = get_s16le(data + 0x1c); + ch->mod4 = get_s16le(data + 0x1e); - ch->hist1 = get_16bitLE(data + 0x20); - ch->hist2 = get_16bitLE(data + 0x22); - ch->unused3 = get_16bitLE(data + 0x24); - ch->unused4 = get_16bitLE(data + 0x26); - ch->delta1 = get_16bitLE(data + 0x28); - ch->delta2 = get_16bitLE(data + 0x2a); - ch->delta3 = get_16bitLE(data + 0x2c); - ch->delta4 = get_16bitLE(data + 0x2e); + ch->hist1 = get_s16le(data + 0x20); + ch->hist2 = get_s16le(data + 0x22); + ch->unused3 = get_s16le(data + 0x24); + ch->unused4 = get_s16le(data + 0x26); + ch->delta1 = get_s16le(data + 0x28); + ch->delta2 = get_s16le(data + 0x2a); + ch->delta3 = get_s16le(data + 0x2c); + ch->delta4 = get_s16le(data + 0x2e); - ch->delta5 = get_16bitLE(data + 0x30); - ch->unused5 = get_16bitLE(data + 0x32); + ch->delta5 = get_s16le(data + 0x30); + ch->unused5 = get_s16le(data + 0x32); VGM_ASSERT(ch->signature != 0x02, "UBI ADPCM: incorrect channel header\n"); VGM_ASSERT(ch->unused3 != 0x00, "UBI ADPCM: found unused3 used\n"); @@ -580,9 +580,28 @@ static void decode_frame(STREAMFILE* sf, ubi_adpcm_codec_data* data) { } -int ubi_adpcm_get_samples(ubi_adpcm_codec_data* data) { +int32_t ubi_adpcm_get_samples(ubi_adpcm_codec_data* data) { if (!data) return 0; return data->header.sample_count / data->header.channels; } + +int32_t ubi_adpcm_bytes_to_samples(ubi_adpcm_codec_data* data, uint32_t size) { + uint32_t frame_size; + + if (!data || !data->header.channels || !data->header.subframes_per_frame) + return 0; + + /* don't trust subframe count */ + + size -= 0x30; /* header */ + + frame_size = 0x34 * data->header.channels; /* setup per channel */ + frame_size += (data->header.codes_per_subframe * data->header.bits_per_sample /*+ 8*/) * data->header.subframes_per_frame / 8; + frame_size += data->header.subframes_per_frame * 0x01; /* padding byte */ + + return ((size - 0x01) / frame_size) * /* force smaller size so last frame isn't used */ + data->header.codes_per_subframe * data->header.subframes_per_frame + + data->header.codes_per_subframe_last * data->header.subframes_per_frame; +} diff --git a/src/libvgmstream.vcxproj b/src/libvgmstream.vcxproj index b57dda2b..f2d84dd6 100644 --- a/src/libvgmstream.vcxproj +++ b/src/libvgmstream.vcxproj @@ -175,6 +175,7 @@ + @@ -727,6 +728,7 @@ + diff --git a/src/libvgmstream.vcxproj.filters b/src/libvgmstream.vcxproj.filters index 09f7c57d..a3c74ab1 100644 --- a/src/libvgmstream.vcxproj.filters +++ b/src/libvgmstream.vcxproj.filters @@ -323,6 +323,9 @@ Header Files + + Header Files + @@ -1969,6 +1972,9 @@ Source Files + + Source Files + meta\Source Files diff --git a/src/meta/bkhd.c b/src/meta/bkhd.c index 37f7dde7..2b255bfd 100644 --- a/src/meta/bkhd.c +++ b/src/meta/bkhd.c @@ -126,25 +126,14 @@ VGMSTREAM* init_vgmstream_bkhd(STREAMFILE* sf) { /* detect format */ if (subfile_offset <= 0 || subfile_size <= 0) { - /* some indexes don't have data */ is_dummy = 1; - } - else if (read_f32(subfile_offset + 0x02, sf) >= 30.0 && - read_f32(subfile_offset + 0x02, sf) <= 250.0) { - /* ignore Wwise's custom .wmid (similar to a regular midi but with simplified - * chunks and custom fields: 0x00=MThd's division, 0x02: bpm (new), etc) */ - is_wmid = 1; - } - /* default is riff/sfx */ - - - if (is_dummy || is_wmid) { - /* for now leave a dummy song for easier .bnk index-to-subsong mapping */ + /* rarely some indexes don't have data (early bnk) + * for now leave a dummy song for easier .bnk index-to-subsong mapping */ vgmstream = init_vgmstream_silence(0, 0, 0); if (!vgmstream) goto fail; } else { - /* could pass .wem but few files need memory .wem detection */ + /* could pass .wem extension but few files need memory .wem detection */ temp_sf = setup_subfile_streamfile(sf, subfile_offset, subfile_size, NULL); if (!temp_sf) goto fail; @@ -153,13 +142,21 @@ VGMSTREAM* init_vgmstream_bkhd(STREAMFILE* sf) { vgmstream = init_vgmstream_wwise_bnk(temp_sf, &prefetch); if (!vgmstream) goto fail; } + else if (read_f32(subfile_offset + 0x02, temp_sf) >= 30.0 && + read_f32(subfile_offset + 0x02, temp_sf) <= 250.0) { + is_wmid = 1; + /* ignore Wwise's custom .wmid (similar to a regular midi but with simplified + * chunks and custom fields: 0x00=MThd's division, 0x02: bpm (new), etc) */ + vgmstream = init_vgmstream_silence(0, 0, 0); + if (!vgmstream) goto fail; + } else { + /* may fail if not an actual wfx */ vgmstream = init_vgmstream_bkhd_fx(temp_sf); if (!vgmstream) goto fail; } } - vgmstream->num_streams = total_subsongs; { diff --git a/src/meta/sqex_sead.c b/src/meta/sqex_sead.c index 651b7cf3..f1e30be7 100644 --- a/src/meta/sqex_sead.c +++ b/src/meta/sqex_sead.c @@ -79,9 +79,14 @@ VGMSTREAM* init_vgmstream_sqex_sead(STREAMFILE* sf) { goto fail; } - /* SEAD handles both sab/mab in the same lib, and other similar files (config, engine, etc). + /* SEAD handles both sab/mab in the same lib (libsead), and other similar files (config, engine, etc). * Has some chunks pointing to sections, and each section entry (usually starting with section - * version/reserved/size) is always padded to 0x10. Most values are unsigned. */ + * version/reserved/size) is always padded to 0x10. Most values are unsigned. + * + * "SEAD Engine" (Square Enix Application on Demand Engine) is/was SQEX's internal middleware (~2006), + * so it's possible SEAD refers to the whole thing rather than audio, but since .sab/mab audio lib typically goes + * with other engines it's hard to say if "libsead" is the whole engine but trimmed with only audio functions, + * or is a separate audio lib derived from this "SEAD Engine". */ sead.big_endian = guess_endianness16bit(0x06, sf); /* no flag, use size */ diff --git a/src/meta/sspf.c b/src/meta/sspf.c index 649abf83..2929fa4e 100644 --- a/src/meta/sspf.c +++ b/src/meta/sspf.c @@ -1,6 +1,8 @@ #include "meta.h" +static int freq_to_rate(int freq); + /* SSPF - Konami/KCET banks [Metal Gear Solid 4 (PS3)] */ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) { VGMSTREAM* vgmstream = NULL; @@ -8,7 +10,7 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) { int loop_flag, channels, sample_rate; int32_t num_samples, loop_start; int total_subsongs, target_subsong = sf->stream_index; - uint32_t file_size, pad_size, offset, bwav_offset, iwav_offset, ssw2_offset, stream_size; + uint32_t file_size, pad_size, offset, bwav_offset, iwav_offset, wave_offset, stream_size; uint32_t codec; @@ -45,42 +47,43 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) { offset = iwav_offset + 0x10 + (target_subsong - 1) * 0x20; /* IWAV entry supposedly contains more info but seems only offset and some ID at 0x14, rest is 0 */ - ssw2_offset = read_u32be(offset + 0x00,sf) + bwav_offset; - if (is_id32be(ssw2_offset,sf, "SSWF")) { - /* - 04 kType (always 0x01) - 05 nChannels - 06 freq - 08 lpStart - 0C nSamples - */ + wave_offset = read_u32be(offset + 0x00,sf) + bwav_offset; + if (is_id32be(wave_offset,sf, "SSWF")) { + codec = read_u8(wave_offset + 0x04,sf); /* kType (always 0x01) */ + if (read_u8(wave_offset + 0x05,sf) != 0x01) /* nChannels? */ + goto fail; + sample_rate = read_u16be(wave_offset + 0x06,sf); /* not freq (ex. 48000 is used) */ + loop_start = read_s32be(wave_offset + 0x08,sf); + num_samples = read_s32be(wave_offset + 0x0c,sf); - /* data is some unknown codec that seems to be ADPCM header + byte (simplified MTA2 with only 1 group?) */ - vgm_logi("SSPF: unsupported SSWF variant at %x\n", ssw2_offset); - goto fail; + channels = 1; + loop_flag = loop_start != 0x7FFFFFFF; + start_offset = wave_offset + 0x10; + + stream_size = 0x10 + (num_samples * channels * 0x02); /* implicit */ } - else if (is_id32be(ssw2_offset,sf, "SSW2")) { - stream_size = read_u32be(ssw2_offset + 0x04,sf); + else if (is_id32be(wave_offset,sf, "SSW2")) { + stream_size = read_u32be(wave_offset + 0x04,sf); /* 08 version? (always 0) */ - num_samples = read_s32be(ssw2_offset + 0x0c,sf); - codec = read_u32be(ssw2_offset + 0x10,sf); /* kType (always 0x21) */ - if (read_u32be(ssw2_offset + 0x10,sf) != 0x21) + num_samples = read_s32be(wave_offset + 0x0c,sf); + codec = read_u32be(wave_offset + 0x10,sf); /* kType (always 0x21) */ + if (read_u32be(wave_offset + 0x10,sf) != 0x21) goto fail; - if (read_u8(ssw2_offset + 0x14,sf) != 0x08) /* nBlocks? */ + if (read_u8(wave_offset + 0x14,sf) != 0x08) /* nBlocks? */ goto fail; - if (read_u8(ssw2_offset + 0x15,sf) != 0x01) /* nChannels? */ + if (read_u8(wave_offset + 0x15,sf) != 0x01) /* nChannels? */ goto fail; channels = 1; - sample_rate = read_u16be(ssw2_offset + 0x16,sf); - loop_start = read_s32be(ssw2_offset + 0x18,sf); + sample_rate = freq_to_rate(read_u16be(wave_offset + 0x16,sf)); /* freq value */ + loop_start = read_s32be(wave_offset + 0x18,sf); /* 0x1c: lpStartAddr (0xFFFFFFFF is none) */ loop_flag = loop_start != 0x7FFFFFFF; - start_offset = ssw2_offset + 0x20; + start_offset = wave_offset + 0x20; } else { - vgm_logi("SSPF: unknown variant at %x\n", ssw2_offset); + vgm_logi("SSPF: unknown variant at %x\n", wave_offset); goto fail; } @@ -99,6 +102,12 @@ VGMSTREAM* init_vgmstream_sspf(STREAMFILE* sf) { vgmstream->stream_size = stream_size; switch (codec) { + case 0x01: + vgmstream->coding_type = coding_PCM16BE; + vgmstream->layout_type = layout_interleave; + vgmstream->interleave_block_size = 0x02; + break; + case 0x21: vgmstream->coding_type = coding_MTA2; vgmstream->codec_config = 1; @@ -118,3 +127,33 @@ fail: close_vgmstream(vgmstream); return NULL; } + +/* transforms internal freq to sample rate */ +static int freq_to_rate(int freq) { + /* from PowerPC code seems like it's trying something like this, but not quite (PPC is complex): + if ((freq & 0xFF) != 0) + return powf(10.0, 0.0117647 * (freq & 0xFF))) * 20.0; + return powf(10.0, 0.0117647 * 2048)) * 20.0; //??? + */ + + //TODO improve, for now fake it + switch(freq) { + case 0x9000: return 24000; /* most voices, sounds right */ + case 0xA200: return 48000; /* most sfx */ + /* rest is rarely used for some sfx, so it's hard to guess actual frequency and this is just approximate */ + case 0x9fcd: return 44100; + case 0x9c9c: return 39000; + case 0x9b79: return 38000; + case 0x9b13: return 37000; + case 0x9a88: return 36000; + case 0x9778: return 32000; + case 0x9401: return 28000; + case 0x8578: return 16000; + case 0x7e00: return 11050; + default: + VGM_LOG("SSPF: unknown freq %x\n", freq); + break; + } + + return freq; +} diff --git a/src/meta/txth.c b/src/meta/txth.c index 8f746f00..e2354841 100644 --- a/src/meta/txth.c +++ b/src/meta/txth.c @@ -2,8 +2,11 @@ #include "../coding/coding.h" #include "../layout/layout.h" #include "txth_streamfile.h" +#include "../util/text_reader.h" -#define TXT_LINE_MAX 0x2000 +#define TXT_LINE_MAX 2048 /* probably ~1000 would be ok */ +#define TXT_LINE_KEY_MAX 128 +#define TXT_LINE_VAL_MAX (TXT_LINE_MAX - TXT_LINE_KEY_MAX) /* known TXTH types */ typedef enum { @@ -862,7 +865,7 @@ static int get_padding_size(txth_header* txth, int discard_empty); /* Simple text parser of "key = value" lines. * The code is meh and error handling not exactly the best. */ static int parse_txth(txth_header* txth) { - off_t txt_offset, file_size; + uint32_t txt_offset; /* setup txth defaults */ if (txth->sf_body) @@ -872,23 +875,28 @@ static int parse_txth(txth_header* txth) { txt_offset = read_bom(txth->sf_text); - file_size = get_streamfile_size(txth->sf_text); /* read lines */ { - char line[TXT_LINE_MAX]; - char key[TXT_LINE_MAX]; - char val[TXT_LINE_MAX]; - /* at least as big as a line to avoid overflows (I hope) */ + text_reader_t tr; + uint8_t buf[TXT_LINE_MAX + 1]; + char key[TXT_LINE_KEY_MAX]; + char val[TXT_LINE_VAL_MAX]; + int ok, line_len; + char* line; - while (txt_offset < file_size) { - int ok, bytes_read, line_ok; + if (!text_reader_init(&tr, buf, sizeof(buf), txth->sf_text, txt_offset, 0)) + goto fail; - bytes_read = read_line(line, sizeof(line), txt_offset, txth->sf_text, &line_ok); - if (!line_ok) goto fail; - //;VGM_LOG("TXTH: line=%s\n",line); + do { + line_len = text_reader_get_line(&tr, &line); + if (line_len < 0) goto fail; /* too big for buf (maybe not text)) */ - txt_offset += bytes_read; + if (line == NULL) /* EOF */ + break; + + if (line_len == 0) /* empty */ + continue; /* get key/val (ignores lead spaces, stops at space/comment/separator) */ ok = sscanf(line, " %[^ \t#=] = %[^\t#\r\n] ", key,val); @@ -897,7 +905,8 @@ static int parse_txth(txth_header* txth) { if (!parse_keyval(txth->sf, txth, key, val)) /* read key/val */ goto fail; - } + + } while (line_len >= 0); } if (!txth->loop_flag_set) diff --git a/src/meta/txtp.c b/src/meta/txtp.c index ad51936d..72df65d7 100644 --- a/src/meta/txtp.c +++ b/src/meta/txtp.c @@ -3,11 +3,14 @@ #include "../layout/layout.h" #include "../mixing.h" #include "../plugins.h" +#include "../util/text_reader.h" #include -#define TXTP_LINE_MAX 1024 +#define TXT_LINE_MAX 2048 /* some wwise .txtp get wordy */ +#define TXT_LINE_KEY_MAX 128 +#define TXT_LINE_VAL_MAX (TXT_LINE_MAX - TXT_LINE_KEY_MAX) #define TXTP_MIXING_MAX 512 #define TXTP_GROUP_MODE_SEGMENTED 'S' #define TXTP_GROUP_MODE_LAYERED 'L' @@ -68,7 +71,7 @@ typedef struct { typedef struct { /* main entry */ - char filename[TXTP_LINE_MAX]; + char filename[TXT_LINE_MAX]; int silent; /* TXTP settings (applied at the end) */ @@ -1271,7 +1274,7 @@ static inline int is_match(const char* str1, const char* str2) { static void parse_params(txtp_entry* entry, char* params) { /* parse params: #(commands) */ int n, nc, nm, mc; - char command[TXTP_LINE_MAX]; + char command[TXT_LINE_MAX]; play_config_t* tcfg = &entry->config; entry->range_start = 0; @@ -1802,7 +1805,7 @@ fail: static int is_substring(const char* val, const char* cmp) { int n; - char subval[TXTP_LINE_MAX]; + char subval[TXT_LINE_MAX]; /* read string without trailing spaces or comments/commands */ if (sscanf(val, " %s%n[^ #\t\r\n]%n", subval, &n, &n) != 1) @@ -1862,12 +1865,12 @@ static int parse_keyval(txtp_header* txtp, const char* key, const char* val) { } } else if (0==strcmp(key,"commands")) { - char val2[TXTP_LINE_MAX]; + char val2[TXT_LINE_MAX]; strcpy(val2, val); /* copy since val is modified here but probably not important */ if (!add_entry(txtp, val2, 1)) goto fail; } else if (0==strcmp(key,"group")) { - char val2[TXTP_LINE_MAX]; + char val2[TXT_LINE_MAX]; strcpy(val2, val); /* copy since val is modified here but probably not important */ if (!add_group(txtp, val2)) goto fail; @@ -1884,7 +1887,7 @@ fail: static txtp_header* parse_txtp(STREAMFILE* sf) { txtp_header* txtp = NULL; - off_t txt_offset, file_size; + uint32_t txt_offset; txtp = calloc(1,sizeof(txtp_header)); @@ -1894,23 +1897,28 @@ static txtp_header* parse_txtp(STREAMFILE* sf) { txtp->is_segmented = 1; txt_offset = read_bom(sf); - file_size = get_streamfile_size(sf); /* read and parse lines */ { - char line[TXTP_LINE_MAX]; - char key[TXTP_LINE_MAX]; - char val[TXTP_LINE_MAX]; - char filename[TXTP_LINE_MAX]; - /* at least as big as a line to avoid overflows (I hope) */ + text_reader_t tr; + uint8_t buf[TXT_LINE_MAX + 1]; + char key[TXT_LINE_KEY_MAX]; + char val[TXT_LINE_VAL_MAX]; + int ok, line_len; + char* line; - while (txt_offset < file_size) { - int ok, bytes_read, line_ok; + if (!text_reader_init(&tr, buf, sizeof(buf), sf, txt_offset, 0)) + goto fail; - bytes_read = read_line(line, sizeof(line), txt_offset, sf, &line_ok); - if (!line_ok) goto fail; + do { + line_len = text_reader_get_line(&tr, &line); + if (line_len < 0) goto fail; /* too big for buf (maybe not text)) */ - txt_offset += bytes_read; + if (line == NULL) /* EOF */ + break; + + if (line_len == 0) /* empty */ + continue; /* try key/val (ignores lead/trail spaces, # may be commands or comments) */ ok = sscanf(line, " %[^ \t#=] = %[^\t\r\n] ", key,val); @@ -1921,16 +1929,17 @@ static txtp_header* parse_txtp(STREAMFILE* sf) { } /* must be a filename (only remove spaces from start/end, as filenames con contain mid spaces/#/etc) */ - ok = sscanf(line, " %[^\t\r\n] ", filename); + ok = sscanf(line, " %[^\t\r\n] ", val); if (ok != 1) /* not a filename either */ continue; - if (filename[0] == '#') + if (val[0] == '#') continue; /* simple comment */ /* filename with settings */ - if (!add_entry(txtp, filename, 0)) + if (!add_entry(txtp, val, 0)) goto fail; - } + + } while (line_len >= 0); } /* mini-txth: if no entries are set try with filename, ex. from "song.ext#3.txtp" use "song.ext#3" diff --git a/src/meta/ubi_hx.c b/src/meta/ubi_hx.c index e43379a3..3b80d4b8 100644 --- a/src/meta/ubi_hx.c +++ b/src/meta/ubi_hx.c @@ -4,11 +4,12 @@ #include "../util/endianness.h" -typedef enum { PCM, UBI, PSX, DSP, XIMA, ATRAC3, XMA2, MP3 } ubi_hx_codec; +typedef enum { PCM, UBI, PSX, DSP, XIMA, ATRAC3, XMA2, MP3, SILENCE } ubi_hx_codec; typedef struct { int big_endian; int total_subsongs; + int is_riff; int codec_id; ubi_hx_codec codec; /* unified codec */ @@ -68,7 +69,7 @@ VGMSTREAM* init_vgmstream_ubi_hx(STREAMFILE* sf) { * Game seems to play files by calling linked ids: EventResData (play/stop/etc) > Random/Program/Wav ResData (1..N refs) > FileIdObj */ /* HX HEADER */ - hx.big_endian = guess_endianness32bit(0x00, sf); + hx.big_endian = guess_endian32(0x00, sf); if (!parse_hx(&hx, sf, target_subsong)) goto fail; @@ -140,13 +141,19 @@ fail: static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) { read_u32_t read_u32 = hx->big_endian ? read_u32be : read_u32le; read_s32_t read_s32 = hx->big_endian ? read_s32be : read_s32le; - off_t index_offset, offset; + uint32_t index_type, index_offset, offset; int i, index_entries; char class_name[255]; index_offset = read_u32(0x00, sf); + index_type = read_u32(index_offset + 0x04, sf); index_entries = read_s32(index_offset + 0x08, sf); + + /* doesn't seem to have names (no way to link) */ + if (index_type == 0x01) + return 1; + offset = index_offset + 0x0c; for (i = 0; i < index_entries; i++) { off_t header_offset; @@ -169,29 +176,34 @@ static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) { //unknown_count = read_s32(offset + 0x00, sf); offset += 0x04; - link_count = read_s32(offset + 0x00, sf); - offset += 0x04; - for (j = 0; j < link_count; j++) { - uint32_t link_id1 = read_u32(offset + 0x00, sf); - uint32_t link_id2 = read_u32(offset + 0x04, sf); - - if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) { - is_found = 1; - } - offset += 0x08; + if (index_type == 0x01) { + goto fail; } + else { + link_count = read_s32(offset + 0x00, sf); + offset += 0x04; + for (j = 0; j < link_count; j++) { + uint32_t link_id1 = read_u32(offset + 0x00, sf); + uint32_t link_id2 = read_u32(offset + 0x04, sf); - language_count = read_s32(offset + 0x00, sf); - offset += 0x04; - for (j = 0; j < language_count; j++) { - uint32_t link_id1 = read_u32(offset + 0x08, sf); - uint32_t link_id2 = read_u32(offset + 0x0c, sf); - - if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) { - is_found = 1; + if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) { + is_found = 1; + } + offset += 0x08; } - offset += 0x10; + language_count = read_s32(offset + 0x00, sf); + offset += 0x04; + for (j = 0; j < language_count; j++) { + uint32_t link_id1 = read_u32(offset + 0x08, sf); + uint32_t link_id2 = read_u32(offset + 0x0c, sf); + + if (link_id1 == hx->cuuid1 && link_id2 == hx->cuuid2) { + is_found = 1; + } + + offset += 0x10; + } } /* identify all possible names so unknown platforms fail */ @@ -228,6 +240,7 @@ static int parse_name(ubi_hx_header* hx, STREAMFILE* sf) { } fail: + vgm_logi("UBI HX: error parsing name at %x (report)\n", index_offset); return 0; } @@ -265,11 +278,14 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint uint32_t flag_type = read_u32(offset + 0x00, sf); if (flag_type == 0x01 || flag_type == 0x02) { /* Rayman Arena */ - uint32_t unk_value = read_u32(offset + 0x04, sf); - if (unk_value != 0x00 && /* common */ - unk_value != 0xbe570a3d && /* Largo Winch: Empire Under Threat (PC)-most */ - unk_value != 0xbf8e147b) /* Largo Winch: Empire Under Threat (PC)-few */ + uint32_t unk_value = read_u32(offset + 0x04, sf); /* float? */ + if (unk_value != 0x00 && /* common */ + unk_value != 0xbe570a3d && /* Largo Winch: Empire Under Threat (PC)-most */ + unk_value != 0xbf8e147b) { /* Largo Winch: Empire Under Threat (PC)-few */ + VGM_LOG("ubi hx: unknown flag\n"); goto fail; + } + hx->stream_mode = read_u32(offset + 0x08, sf); /* flag: 0=internal, 1=external */ /* 0x0c: flag: 0=static, 1=stream */ offset += 0x10; @@ -279,7 +295,8 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint offset += 0x08; if (strcmp(hx->class_name, "CGCWaveFileIdObj") == 0) { - if (read_u32(offset + 0x00, sf) != read_u32(offset + 0x04, sf)) goto fail; /* meaning? */ + if (read_u32(offset + 0x00, sf) != read_u32(offset + 0x04, sf)) + goto fail; /* meaning? */ hx->stream_mode = read_u32(offset + 0x04, sf); offset += 0x08; } @@ -302,6 +319,7 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint //todo probably a flag: &1=external, &2=stream, &8=has adjust (XIII), &4=??? (XIII PS2, small, mono) switch(hx->stream_mode) { case 0x00: /* memory (internal file) */ + case 0x02: /* same (no diffs in size/channels/etc?) [Rayman 3 demo (PC)] */ riff_offset = offset; riff_size = read_u32(riff_offset + 0x04, sf) + 0x08; break; @@ -321,13 +339,17 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint break; default: - VGM_LOG("ubi hx: %x\n", hx->stream_mode); + VGM_LOG("ubi hx: unknown stream mode %x\n", hx->stream_mode); goto fail; } /* parse pseudo-RIFF "fmt" */ - if (read_u32(riff_offset, sf) != 0x46464952) /* "RIFF" in machine endianness */ + if (read_u32(riff_offset, sf) != 0x46464952) { /* "RIFF" in machine endianness */ + VGM_LOG("ubi hx: unknown RIFF\n"); goto fail; + } + + hx->is_riff = 1; hx->codec_id = read_u16(riff_offset + 0x14 , sf); switch(hx->codec_id) { @@ -357,12 +379,15 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint hx->stream_offset = read_u32(chunk_offset + 0x00, sf) + stream_adjust; } else { + VGM_LOG("ubi hx: unknown chunk\n"); goto fail; } } else { - if (!find_chunk_riff_ve(sf, 0x61746164,riff_offset + 0x0c,riff_size - 0x0c, &chunk_offset,&chunk_size, hx->big_endian)) + if (!find_chunk_riff_ve(sf, 0x61746164,riff_offset + 0x0c,riff_size - 0x0c, &chunk_offset,&chunk_size, hx->big_endian)) { + VGM_LOG("ubi hx: unknown chunk RIFF\n"); goto fail; + } hx->stream_offset = chunk_offset; if (chunk_size > riff_size - (chunk_offset - riff_offset) || !chunk_size) @@ -384,7 +409,11 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint //todo some dummy files have 0 size - if (read_u32(offset + 0x00, sf) != 0x01) goto fail; + if (read_u32(offset + 0x00, sf) != 0x01) { + VGM_LOG("ubi hx: unknown flag non 0x01\n"); + goto fail; + } + /* 0x04: some kind of parent id shared by multiple Waves, or 0 */ offset += 0x08; @@ -400,7 +429,9 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint switch(hx->channels) { case 0x48: hx->channels = 1; break; case 0x90: hx->channels = 2; break; - default: goto fail; + default: + VGM_LOG("ubi hx: channel type %x\n", hx->channels); + goto fail; } hx->sample_rate = (read_u16(offset + 0x02, sf) & 0x7FFFu) << 1u; /* ??? */ cue_flag = read_u8(offset + 0x03, sf) & (1 << 7); @@ -461,6 +492,7 @@ static int parse_header(ubi_hx_header* hx, STREAMFILE* sf, uint32_t offset, uint } } else { + VGM_LOG("ubi hx: unknown type\n"); goto fail; } @@ -478,13 +510,21 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) { uint32_t index_offset, offset; int i, index_entries; char class_name[255]; + uint32_t index_type; index_offset = read_u32(0x00, sf); - if (read_u32(index_offset + 0x00, sf) != get_id32be("XDNI")) /* (INDX in given endianness) */ + if (read_u32(index_offset + 0x00, sf) != get_id32be("XDNI")) { /* (INDX in given endianness) */ + VGM_LOG("ubi hx: unknown index\n"); goto fail; - if (read_u32(index_offset + 0x04, sf) != 0x02) /* type? */ + } + + /* usually 0x02, rarely 0x01 [Rayman M demo (PS2)] */ + index_type = read_u32(index_offset + 0x04, sf); + if (index_type != 0x01 && index_type != 0x02) { + VGM_LOG("ubi hx: unknown index type\n"); goto fail; + } if (target_subsong == 0) target_subsong = 1; @@ -517,23 +557,29 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) { } offset += 0x04; - /* ids that this object directly points to (ex. Event > Random) */ - link_count = read_s32(offset + 0x00, sf); - offset += 0x04 + 0x08 * link_count; + if (index_type == 0x01) { + link_count = 0; + language_count = 0; + } + else { + /* ids that this object directly points to (ex. Event > Random) */ + link_count = read_s32(offset + 0x00, sf); + offset += 0x04 + 0x08 * link_count; - /* localized id list of WavRes (can use this list instead of the prev one) */ - language_count = read_s32(offset + 0x00, sf); - offset += 0x04; - for (j = 0; j < language_count; j++) { - /* 0x00: lang code, in reverse endianness: "en ", "fr ", etc */ - /* 0x04: possibly count of ids for this lang */ - /* 0x08: id1+2 */ + /* localized id list of WavRes (can use this list instead of the prev one) */ + language_count = read_s32(offset + 0x00, sf); + offset += 0x04; + for (j = 0; j < language_count; j++) { + /* 0x00: lang code, in reverse endianness: "en ", "fr ", etc */ + /* 0x04: possibly count of ids for this lang */ + /* 0x08: id1+2 */ - if (read_u32(offset + 0x04, sf) != 1) { - VGM_LOG("ubi hx: wrong lang count near %x\n", offset); - goto fail; /* WavRes doesn't have this field */ + if (read_u32(offset + 0x04, sf) != 1) { + VGM_LOG("ubi hx: wrong lang count near %x\n", offset); + goto fail; /* WavRes doesn't have this field */ + } + offset += 0x10; } - offset += 0x10; } //todo figure out CProgramResData sequences @@ -571,6 +617,7 @@ static int parse_hx(ubi_hx_header* hx, STREAMFILE* sf, int target_subsong) { goto fail; } + /* should only exist on non-wave objects (like CProgramResData) */ if (link_count != 0) { vgm_logi("UBI HX: found links in wav object (report)\n"); goto fail; @@ -632,6 +679,12 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf sb = sf; } + /* very rarely a game uses Ubi ADPCM, but data is empty and has missing header [Rayman 3 demo 3 (PC) fixe.hxc#84] */ + if (hx->is_riff && hx->codec == UBI) { //todo improve + if (read_u32le(hx->stream_offset, sb) == 0x02) { + hx->codec = SILENCE; + } + } /* build the VGMSTREAM */ vgmstream = allocate_vgmstream(hx->channels, hx->loop_flag); @@ -658,6 +711,12 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf vgmstream->layout_type = layout_none; vgmstream->num_samples = ubi_adpcm_get_samples(vgmstream->codec_data); + + /* some kind of internal bug I guess, seen in a few subsongs in Rayman 3 PC demo, other values are also buggy */ + if (vgmstream->num_samples == 0x77E7A374) { + vgmstream->num_samples = ubi_adpcm_bytes_to_samples(vgmstream->codec_data, hx->stream_size); + } + /* XIII has 6-bit stereo music, Rayman 3 4-bit music, both use 6-bit mono) */ break; @@ -745,6 +804,13 @@ static VGMSTREAM* init_vgmstream_ubi_hx_header(ubi_hx_header* hx, STREAMFILE* sf break; } #endif + + case SILENCE: /* special hack */ + vgmstream->coding_type = coding_SILENCE; + vgmstream->layout_type = layout_none; + + vgmstream->num_samples = ps_bytes_to_samples(hx->stream_size, hx->channels); + break; default: goto fail; } diff --git a/src/util/text_reader.c b/src/util/text_reader.c new file mode 100644 index 00000000..4d8db192 --- /dev/null +++ b/src/util/text_reader.c @@ -0,0 +1,187 @@ +#include +#include "text_reader.h" +#include "log.h" + + +/* convenience function to init the above struct */ +int text_reader_init(text_reader_t* tr, uint8_t* buf, int buf_size, STREAMFILE* sf, uint32_t offset, uint32_t max) { + memset(tr, 0, sizeof(text_reader_t)); + + if (buf_size <= 1 || !buf || !sf) + return 0; + + tr->buf = buf; + tr->buf_size = buf_size; + tr->sf = sf; + tr->offset = offset; + + if (!max) + max = get_streamfile_size(sf) - offset; + tr->max_offset = max; + + return 1; +} + + +/* reads more data into buf and adjust values */ +static void prepare_buf(text_reader_t* tr) { + + /* since we may read N lines in the same buffer, move starting pos each call */ + tr->pos = tr->next_pos; + + /* not more data (but may still read lines so not an error) */ + if (tr->offset >= tr->max_offset) { + return; + } + + /* request more data */ + if (tr->pos >= tr->filled) { + tr->pos = 0; + tr->filled = 0; + } + + /* partially filled, move buffer */ + if (tr->pos > 0) { + int move_size = tr->filled - tr->pos; + + memmove(tr->buf, &tr->buf[tr->pos], move_size); /* memmove = may overlap */ + tr->filled -= tr->pos; /* now less filled */ + tr->pos = 0; + } + + /* has enough data */ + if (tr->filled >= tr->buf_size) { + return; + } + + /* read buf up to max */ + { + int bytes; + int read_size = tr->buf_size - tr->filled; + if (read_size + tr->offset > tr->max_offset) + read_size = tr->max_offset - tr->offset; + + if (read_size <= 0) { /* ??? */ + bytes = 0; + } + else { + if (tr->filled + read_size >= tr->buf_size) + read_size -= 1; /* always leave an extra byte for c-string null */ + + bytes = read_streamfile(tr->buf + tr->filled, tr->offset, read_size, tr->sf); + tr->offset += bytes; + tr->filled += bytes; + } + + /* maybe some internal issue, force EOF */ + if (bytes == 0) { + tr->offset = tr->max_offset; + } + + /* ensure no old data is used as valid (simplifies some checks during parse) */ + tr->buf[tr->filled] = '\0'; + } +} + +static void parse_buf(text_reader_t* tr) { + int i; + + tr->line = (char*)&tr->buf[tr->pos]; + tr->line_len = 0; + tr->line_ok = 0; + + /* detect EOF (this should only happen if no more data was loaded) */ + if (tr->pos == tr->filled) { + tr->line = NULL; + tr->line_ok = 1; + tr->line_len = 0; + return; + } + + /* assumes filled doesn't reach buf_size (to allow trailing \0 after filled) */ + for (i = tr->pos; i < tr->filled; i++) { + char c = (char)tr->buf[i]; + + if (c == '\0') { + i++; + break; /* not a valid file? (line_ok=0) */ + } + + if (c == '\r' && tr->buf[i+1] == '\n') { /* CRLF (0x0d0a) */ + /* i+1 may read past filled but it's pre-set to \0 */ + i += 2; //todo check that i < buf_size-1 + tr->line_ok = 1; + break; + } + else if (c == '\n') { /* LF (0x0a) */ + i++; + tr->line_ok = 1; + break; + } + else if (c == '\r') { /* CR (0x0d) */ + i++; + tr->line_ok = (i < tr->buf_size - 1); + /* if buf ends with a CR, next buf may start be a LF (single CRLF), so line is not ok near buf end + * (old Macs use single \r as lines, but using only that and reaching buf end should happen rarely) */ + break; + } + + tr->line_len++; + } + + /* when lines are small may read up to filled smaller than buf, with no more data */ + if (!tr->line_ok && i == tr->filled) + tr->line_ok = (tr->filled < tr->buf_size - 1); + + /* added after proper line (a \n) or after buf end, so we aren't changing valid data */ + tr->buf[tr->pos + tr->line_len] = '\0'; + tr->next_pos = i; +} + +int text_reader_get_line(text_reader_t* tr, char** p_line) { + + if (!tr->buf) /* no init */ + return 0; + + /* how it works: + * - fills buffer up to max or buf_len, from pos 0 + * - counts from 0 to next '\n' or EOF + * - nulls \n or after EOF to make a proper c-string + * - returns from string from pos 0 to len + * - on next call rather than re-reading continues from pos N (after \n) + * - a buf will likely contain multiple lines + * - if read chars reach buf_end (no proper line found): + * - pos = 0: buf isn't big enough, error + * - pos > 0: move data to pos=0, fill rest of buf, fill rest of buf + * + * ex. + * - parse buf: read chunk full [aaaaa\nbbbb] (pos = 0) + * - get line: returns "aaaaa\0" (next_pos points to first 'b') + * - get line: from 'b', but reaches buf end before \n or EOF: must readjust + * - parse buf: move chunk part [bbbb*******] ('b' to beginning, * is garbage) + * - parse buf: read chunk part [bbbbbb\ncc_] (reaches EOF) + * - get line: returns "bbbbbb\0" (pos points to first c) + * - get line: returns "cc\0" + * - get line: returns NULL (reached EOF, no more bytes) + * - (there is an implicit \0 reserved in buf) + * + * ex. + * - start: read chunk [aaaaaaaaaaa] + * - get line: reaches buf end, but didn't reach EOF nor \n: error, can't store line + */ + + prepare_buf(tr); /* may not do anything */ + parse_buf(tr); /* next line */ + + /* if we are reading a partial line there may be more data */ + if (!tr->line_ok && tr->pos > 0) { + prepare_buf(tr); + parse_buf(tr); /* could continue from prev parse but makes logic more complex for little gain */ + } + + /* always output line even if truncated */ + if (p_line) *p_line = tr->line; + return !tr->line_ok ? + -(tr->line_len + 1) : /* -0 also is possible, force -1 */ + tr->line_len; +} diff --git a/src/util/text_reader.h b/src/util/text_reader.h new file mode 100644 index 00000000..93dc51ab --- /dev/null +++ b/src/util/text_reader.h @@ -0,0 +1,43 @@ +#ifndef _TEXT_READER_H_ +#define _TEXT_READER_H_ + + +/* Reader tuned for whole text files, reading chunks to minimize I/O with a single buffer. + * For short lines read_line may be more appropriate (reads up to line end, while this reads bigger chunks), + * which also allow \0 (this reader returns an error). + * NOTE: modifies passed buffer (lines are forced to end with \0 rather than \n). + * + * Usage: set text_reader_t and defaults with text_reader_init, call text_reader_get_line(...) to get lines. + * buf may be size+1 to allow 2^N chunk reads + trailing \0 (better performance?). + */ + +#include "../streamfile.h" + +typedef struct { + /* init */ + uint8_t* buf; /* where data will be read */ + int buf_size; /* size of the struct (also max line size) */ + STREAMFILE* sf; /* used to read data */ + uint32_t offset; /* sf pos */ + uint32_t max_offset; /* sf max */ + + /* internal */ + int filled; /* current buf bytes */ + int pos; /* current buf pos (last line) */ + int next_pos; /* buf pos on next call, after line end */ + int line_ok; /* current line is fully correct */ + + char* line; + int line_len; +} text_reader_t; + + +/* convenience function to init the above struct */ +int text_reader_init(text_reader_t* tr, uint8_t* buf, int buf_size, STREAMFILE* sf, uint32_t offset, uint32_t max); + +/* Reads and sets next line, or NULL if no lines are found (EOF). + * returns line length (0 for empty lines), or <0 if line was too long to store in buf. + * Will always return a valid (null terminated) string. */ +int text_reader_get_line(text_reader_t* tr, char** p_line); + +#endif