From 9973b3fb72338c9eb6506af613251082553a3ff0 Mon Sep 17 00:00:00 2001 From: bnnm Date: Fri, 17 Jan 2025 15:54:26 +0100 Subject: [PATCH] Add .srsa/srst KA1A [Dynasty Warriors Origins (PC)] --- src/base/decode.c | 114 +++--- src/base/decode.h | 2 - src/base/decode_state.h | 2 - src/base/sbuf.c | 55 ++- src/base/sbuf.h | 5 +- src/coding/coding.h | 13 +- src/coding/ka1a_decoder.c | 146 +++++++ src/coding/libs/ka1a_dec.c | 636 +++++++++++++++++++++++++++++++ src/coding/libs/ka1a_dec.h | 42 ++ src/coding/libs/ka1a_dec_data.h | 260 +++++++++++++ src/formats.c | 3 + src/layout/segmented.c | 14 +- src/libvgmstream.vcxproj | 5 + src/libvgmstream.vcxproj.filters | 15 + src/meta/ka1a.c | 56 +++ src/meta/ktsr.c | 40 +- src/meta/meta.h | 2 + src/vgmstream.c | 4 - src/vgmstream.h | 4 +- src/vgmstream_init.c | 1 + src/vgmstream_types.h | 2 + 21 files changed, 1322 insertions(+), 99 deletions(-) create mode 100644 src/coding/ka1a_decoder.c create mode 100644 src/coding/libs/ka1a_dec.c create mode 100644 src/coding/libs/ka1a_dec.h create mode 100644 src/coding/libs/ka1a_dec_data.h create mode 100644 src/meta/ka1a.c diff --git a/src/base/decode.c b/src/base/decode.c index 43938b2e..0e914e59 100644 --- a/src/base/decode.c +++ b/src/base/decode.c @@ -6,7 +6,6 @@ #include "plugins.h" #include "sbuf.h" -#if VGM_TEST_DECODER #include "../util/log.h" #include "decode_state.h" @@ -16,23 +15,26 @@ static void* decode_state_init() { } static void decode_state_reset(VGMSTREAM* vgmstream) { + if (!vgmstream->decode_state) + return; memset(vgmstream->decode_state, 0, sizeof(decode_state_t)); } +static void decode_state_free(VGMSTREAM* vgmstream) { + free(vgmstream->decode_state); +} + // this could be part of the VGMSTREAM but for now keep separate as it simplifies // some loop-related stuff void* decode_init() { return decode_state_init(); } -#endif /* custom codec handling, not exactly "decode" stuff but here to simplify adding new codecs */ void decode_free(VGMSTREAM* vgmstream) { -#if VGM_TEST_DECODER - free(vgmstream->decode_state); -#endif + decode_state_free(vgmstream); if (!vgmstream->codec_data) return; @@ -88,6 +90,10 @@ void decode_free(VGMSTREAM* vgmstream) { free_ea_mt(vgmstream->codec_data, vgmstream->channels); } + if (vgmstream->coding_type == coding_KA1A) { + free_ka1a(vgmstream->codec_data); + } + #ifdef VGM_USE_FFMPEG if (vgmstream->coding_type == coding_FFmpeg) { free_ffmpeg(vgmstream->codec_data); @@ -151,9 +157,7 @@ void decode_free(VGMSTREAM* vgmstream) { void decode_seek(VGMSTREAM* vgmstream) { -#if VGM_TEST_DECODER decode_state_reset(vgmstream); -#endif if (!vgmstream->codec_data) return; @@ -199,6 +203,10 @@ void decode_seek(VGMSTREAM* vgmstream) { seek_ea_mt(vgmstream, vgmstream->loop_current_sample); } + if (vgmstream->coding_type == coding_KA1A) { + seek_ka1a(vgmstream, vgmstream->loop_current_sample); + } + #ifdef VGM_USE_VORBIS if (vgmstream->coding_type == coding_OGG_VORBIS) { seek_ogg_vorbis(vgmstream->codec_data, vgmstream->loop_current_sample); @@ -256,9 +264,7 @@ void decode_seek(VGMSTREAM* vgmstream) { void decode_reset(VGMSTREAM* vgmstream) { -#if VGM_TEST_DECODER decode_state_reset(vgmstream); -#endif if (!vgmstream->codec_data) return; @@ -314,6 +320,10 @@ void decode_reset(VGMSTREAM* vgmstream) { reset_ea_mt(vgmstream); } + if (vgmstream->coding_type == coding_KA1A) { + reset_ka1a(vgmstream->codec_data); + } + #if defined(VGM_USE_MP4V2) && defined(VGM_USE_FDKAAC) if (vgmstream->coding_type == coding_MP4_AAC) { reset_mp4_aac(vgmstream); @@ -857,74 +867,75 @@ bool decode_uses_internal_offset_updates(VGMSTREAM* vgmstream) { return vgmstream->coding_type == coding_MS_IMA || vgmstream->coding_type == coding_MS_IMA_mono; } -#if VGM_TEST_DECODER -// decode frames for decoders which have their own sample buffer -static void decode_frames(sbuf_t* sbuf, VGMSTREAM* vgmstream) { - const int max_empty = 10000; + +// decode frames for decoders which decode frame by frame and have their own sample buffer +static void decode_frames(sbuf_t* sdst, VGMSTREAM* vgmstream) { + const int max_empty = 1000; int num_empty = 0; - decode_state_t* ds = vgmstream->decode_state; + sbuf_t* ssrc = &ds->sbuf; - while (sbuf->filled < sbuf->samples) { - // decode new frame if all was consumed - if (ds->sbuf.filled == 0) { + // fill the external buf by decoding N times; may read partially that buf + while (sdst->filled < sdst->samples) { + + // decode new frame if prev one was consumed + if (ssrc->filled == 0) { bool ok = false; switch (vgmstream->coding_type) { - case coding_TAC: - ok = decode_tac_frame(vgmstream); + case coding_KA1A: + ok = decode_ka1a_frame(vgmstream); break; default: - break; + goto decode_fail; } if (!ok) goto decode_fail; } + // decoder may not fill the buffer in a few calls in some codecs, but more it's probably a bug + if (ssrc->filled == 0) { + num_empty++; + if (num_empty > max_empty) { + VGM_LOG("VGMSTREAM: deadlock?\n"); + goto decode_fail; + } + } + if (ds->discard) { - // decode may signal that decoded samples need to be discarded, because of encoder delay - // (first samples of a file need to be ignored) or a loop - int current_discard = ds->discard; - if (current_discard > ds->sbuf.filled) - current_discard = ds->sbuf.filled; + // decoder may signal that samples need to be discarded (ex. encoder delay or during loops) + int samples_discard = ds->discard; + if (samples_discard > ssrc->filled) + samples_discard = ssrc->filled; - sbuf_consume(&ds->sbuf, current_discard); - - ds->discard -= current_discard; + sbuf_consume(ssrc, samples_discard); + ds->discard -= samples_discard; + // there may be more discard in next loop } else { // copy + consume - int samples_copy = ds->sbuf.filled; - if (samples_copy > sbuf->samples - sbuf->filled) - samples_copy = sbuf->samples - sbuf->filled; + int samples_copy = sbuf_get_copy_max(sdst, ssrc); - sbuf_copy_segments(sbuf, &ds->sbuf); - sbuf_consume(&ds->sbuf, samples_copy); - - sbuf->filled += samples_copy; + sbuf_copy_segments(sdst, ssrc, samples_copy); + sbuf_consume(ssrc, samples_copy); } } return; decode_fail: - /* on error just put some 0 samples */ - VGM_LOG("VGMSTREAM: decode fail, missing %i samples\n", sbuf->samples - sbuf->filled); - sbuf_silence_rest(sbuf); + //TODO clean ssrc? + //* on error just put some 0 samples + VGM_LOG("VGMSTREAM: decode fail, missing %i samples\n", sdst->samples - sdst->filled); + sbuf_silence_rest(sdst); } -#endif + /* Decode samples into the buffer. Assume that we have written samples_filled into the * buffer already, and we have samples_to_do consecutive samples ahead of us (won't call * more than one frame if configured above to do so). * Called by layouts since they handle samples written/to_do */ void decode_vgmstream(VGMSTREAM* vgmstream, int samples_filled, int samples_to_do, sample_t* buffer) { -#if VGM_TEST_DECODER - sbuf_t sbuf_tmp = {0}; - sbuf_t* sbuf = &sbuf_tmp; - sbuf_init_s16(sbuf, buffer, samples_filled + samples_to_do, vgmstream->channels); - sbuf->filled = samples_filled; -#endif int ch; buffer += samples_filled * vgmstream->channels; /* passed externally to simplify I guess */ @@ -1660,11 +1671,18 @@ void decode_vgmstream(VGMSTREAM* vgmstream, int samples_filled, int samples_to_d decode_ea_mt(vgmstream, buffer+ch, vgmstream->channels, samples_to_do, ch); } break; - default: -#if VGM_TEST_DECODER + + default: { + sbuf_t sbuf_tmp = {0}; + sbuf_t* sbuf = &sbuf_tmp; + + // buffers already adjusted + sbuf_init_s16(sbuf, buffer, /*samples_filled +*/ samples_to_do, vgmstream->channels); + sbuf->filled = 0; // samples_filled; + decode_frames(sbuf, vgmstream); -#endif break; + } } } diff --git a/src/base/decode.h b/src/base/decode.h index 4731eab4..4556b272 100644 --- a/src/base/decode.h +++ b/src/base/decode.h @@ -3,9 +3,7 @@ #include "../vgmstream.h" -#if VGM_TEST_DECODER void* decode_init(); -#endif void decode_free(VGMSTREAM* vgmstream); void decode_seek(VGMSTREAM* vgmstream); void decode_reset(VGMSTREAM* vgmstream); diff --git a/src/base/decode_state.h b/src/base/decode_state.h index 64bf7267..3ad71274 100644 --- a/src/base/decode_state.h +++ b/src/base/decode_state.h @@ -1,13 +1,11 @@ #ifndef _DECODE_STATE_H #define _DECODE_STATE_H -#if VGM_TEST_DECODER #include "sbuf.h" typedef struct { int discard; sbuf_t sbuf; } decode_state_t; -#endif #endif diff --git a/src/base/sbuf.c b/src/base/sbuf.c index e6d49c51..184c63e0 100644 --- a/src/base/sbuf.c +++ b/src/base/sbuf.c @@ -3,6 +3,7 @@ //#include #include "../util.h" #include "sbuf.h" +#include "../util/log.h" void sbuf_init(sbuf_t* sbuf, sfmt_t format, void* buf, int samples, int channels) { @@ -14,19 +15,15 @@ void sbuf_init(sbuf_t* sbuf, sfmt_t format, void* buf, int samples, int channels } void sbuf_init_s16(sbuf_t* sbuf, int16_t* buf, int samples, int channels) { - memset(sbuf, 0, sizeof(sbuf_t)); - sbuf->buf = buf; - sbuf->samples = samples; - sbuf->channels = channels; - sbuf->fmt = SFMT_S16; + sbuf_init(sbuf, SFMT_S16, buf, samples, channels); } void sbuf_init_f32(sbuf_t* sbuf, float* buf, int samples, int channels) { - memset(sbuf, 0, sizeof(sbuf_t)); - sbuf->buf = buf; - sbuf->samples = samples; - sbuf->channels = channels; - sbuf->fmt = SFMT_F32; + sbuf_init(sbuf, SFMT_F32, buf, samples, channels); +} + +void sbuf_init_flt(sbuf_t* sbuf, float* buf, int samples, int channels) { + sbuf_init(sbuf, SFMT_FLT, buf, samples, channels); } @@ -50,19 +47,19 @@ void* sbuf_get_filled_buf(sbuf_t* sbuf) { return buf; } -void sbuf_consume(sbuf_t* sbuf, int count) { +void sbuf_consume(sbuf_t* sbuf, int samples) { int sample_size = sfmt_get_sample_size(sbuf->fmt); - if (sample_size <= 0) + if (sample_size <= 0) //??? return; - if (count > sbuf->samples || count > sbuf->filled) //TODO? + if (samples > sbuf->samples || samples > sbuf->filled) //??? return; uint8_t* buf = sbuf->buf; - buf += count * sbuf->channels * sample_size; + buf += samples * sbuf->channels * sample_size; sbuf->buf = buf; - sbuf->filled -= count; - sbuf->samples -= count; + sbuf->filled -= samples; + sbuf->samples -= samples; } /* when casting float to int, value is simply truncated: @@ -157,6 +154,15 @@ void sbuf_copy_from_f32(sbuf_t* sbuf, float* src) { } } +// max samples to copy from ssrc to sdst, considering that dst may be partially filled +int sbuf_get_copy_max(sbuf_t* sdst, sbuf_t* ssrc) { + int sdst_max = sdst->samples - sdst->filled; + int samples_copy = ssrc->filled; + if (samples_copy > sdst_max) + samples_copy = sdst_max; + return samples_copy; +} + /* ugly thing to avoid repeating functions */ #define sbuf_copy_segments_internal(dst, src, src_pos, dst_pos, src_max) \ @@ -174,25 +180,29 @@ void sbuf_copy_from_f32(sbuf_t* sbuf, float* src) { dst[dst_pos++] = float_to_int(src[src_pos++] * value); \ } -void sbuf_copy_segments(sbuf_t* sdst, sbuf_t* ssrc) { - /* uncommon so probably fine albeit slower-ish, 0'd other channels first */ +// copy N samples from ssrc into dst (should be clamped externally) +void sbuf_copy_segments(sbuf_t* sdst, sbuf_t* ssrc, int samples_copy) { + if (ssrc->channels != sdst->channels) { - sbuf_silence_part(sdst, sdst->filled, ssrc->filled); + // 0'd other channels first (uncommon so probably fine albeit slower-ish) + sbuf_silence_part(sdst, sdst->filled, samples_copy); sbuf_copy_layers(sdst, ssrc, 0, ssrc->filled); #if 0 - // "faster" but lots of extra ifs, not worth it + // "faster" but lots of extra ifs per sample format, not worth it while (src_pos < src_max) { for (int ch = 0; ch < dst_channels; ch++) { dst[dst_pos++] = ch >= src_channels ? 0 : src[src_pos++]; } } #endif + //TODO: may want to handle externally? + sdst->filled += samples_copy; return; } int src_pos = 0; int dst_pos = sdst->filled * sdst->channels; - int src_max = ssrc->filled * ssrc->channels; + int src_max = samples_copy * ssrc->channels; // define all posible combos, probably there is a better way to handle this but... @@ -239,6 +249,9 @@ void sbuf_copy_segments(sbuf_t* sdst, sbuf_t* ssrc) { float* src = ssrc->buf; sbuf_copy_segments_internal_flt(dst, src, src_pos, dst_pos, src_max, (1/32768.0f)); } + + //TODO: may want to handle externally? + sdst->filled += samples_copy; } diff --git a/src/base/sbuf.h b/src/base/sbuf.h index 226390cb..f5792eb7 100644 --- a/src/base/sbuf.h +++ b/src/base/sbuf.h @@ -30,6 +30,7 @@ typedef struct { void sbuf_init(sbuf_t* sbuf, sfmt_t format, void* buf, int samples, int channels); void sbuf_init_s16(sbuf_t* sbuf, int16_t* buf, int samples, int channels); void sbuf_init_f32(sbuf_t* sbuf, float* buf, int samples, int channels); +void sbuf_init_flt(sbuf_t* sbuf, float* buf, int samples, int channels); int sfmt_get_sample_size(sfmt_t fmt); @@ -39,9 +40,11 @@ void* sbuf_get_filled_buf(sbuf_t* sbuf); void sbuf_consume(sbuf_t* sbuf, int count); /* helpers to copy between buffers; note they assume dst and src aren't the same buf */ +int sbuf_get_copy_max(sbuf_t* sdst, sbuf_t* ssrc); + void sbuf_copy_to_f32(float* dst, sbuf_t* sbuf); void sbuf_copy_from_f32(sbuf_t* sbuf, float* src); -void sbuf_copy_segments(sbuf_t* sdst, sbuf_t* ssrc); +void sbuf_copy_segments(sbuf_t* sdst, sbuf_t* ssrc, int samples_copy); void sbuf_copy_layers(sbuf_t* sdst, sbuf_t* ssrc, int dst_ch_start, int expected); void sbuf_silence_s16(sample_t* dst, int samples, int channels, int filled); diff --git a/src/coding/coding.h b/src/coding/coding.h index 6e052438..85f5ebb6 100644 --- a/src/coding/coding.h +++ b/src/coding/coding.h @@ -372,9 +372,6 @@ typedef struct tac_codec_data tac_codec_data; tac_codec_data* init_tac(STREAMFILE* sf); void decode_tac(VGMSTREAM* vgmstream, sample_t* outbuf, int32_t samples_to_do); -#if VGM_TEST_DECODER -bool decode_tac_frame(VGMSTREAM* vgmstream); -#endif void reset_tac(tac_codec_data* data); void seek_tac(tac_codec_data* data, int32_t num_sample); void free_tac(tac_codec_data* data); @@ -390,6 +387,16 @@ void seek_ice(ice_codec_data* data, int32_t num_sample); void free_ice(ice_codec_data* data); +/* ka1a_decoder */ +typedef struct ka1a_codec_data ka1a_codec_data; + +ka1a_codec_data* init_ka1a(int bitrate_mode, int channels_tracks); +void free_ka1a(ka1a_codec_data* data); +void reset_ka1a(ka1a_codec_data* data); +bool decode_ka1a_frame(VGMSTREAM* vgmstream); +void seek_ka1a(VGMSTREAM* v, int32_t num_sample); + + #ifdef VGM_USE_VORBIS /* ogg_vorbis_decoder */ typedef struct ogg_vorbis_codec_data ogg_vorbis_codec_data; diff --git a/src/coding/ka1a_decoder.c b/src/coding/ka1a_decoder.c new file mode 100644 index 00000000..17d23a95 --- /dev/null +++ b/src/coding/ka1a_decoder.c @@ -0,0 +1,146 @@ +#include "coding.h" +#include "../base/decode_state.h" +#include "libs/ka1a_dec.h" + + +/* opaque struct */ +struct ka1a_codec_data { + uint8_t* buf; + float* fbuf; + + int frame_size; + void* handle; +}; + + +ka1a_codec_data* init_ka1a(int bitrate_mode, int channels_tracks) { + ka1a_codec_data* data = NULL; + int buf_size; + + data = calloc(1, sizeof(ka1a_codec_data)); + if (!data) goto fail; + + data->handle = ka1a_init(bitrate_mode, channels_tracks, 1); + if (!data->handle) goto fail; + + data->frame_size = ka1a_get_frame_size(data->handle); + if (data->frame_size <= 0) goto fail; + + buf_size = data->frame_size * channels_tracks; + data->buf = calloc(buf_size, sizeof(uint8_t)); + if (!data->buf) goto fail; + + data->fbuf = calloc(KA1A_FRAME_SAMPLES * channels_tracks, sizeof(float)); + if (!data->fbuf) goto fail; + + return data; +fail: + free_ka1a(data); + return NULL; +} + +static bool read_ka1a_frame(VGMSTREAM* v) { + ka1a_codec_data* data = v->codec_data; + int bytes; + + if (v->codec_config) { + int block = data->frame_size; + + // interleaved mode: read from each channel separately and mix in buf + for (int ch = 0; ch < v->channels; ch++) { + VGMSTREAMCHANNEL* vs = &v->ch[ch]; + + bytes = read_streamfile(data->buf + block * ch, vs->offset, block, vs->streamfile); + if (bytes != block) + return false; + + vs->offset += bytes; + } + } + else { + // single block of frames + int block = data->frame_size * v->channels; + VGMSTREAMCHANNEL* vs = &v->ch[0]; + + bytes = read_streamfile(data->buf, vs->offset, block, vs->streamfile); + if (bytes != block) + return false; + + vs->offset += bytes; + } + + return true; +} + +bool decode_ka1a_frame(VGMSTREAM* v) { + bool ok = read_ka1a_frame(v); + if (!ok) + return false; + + decode_state_t* ds = v->decode_state; + ka1a_codec_data* data = v->codec_data; + + int samples = ka1a_decode(data->handle, data->buf, data->fbuf); + if (samples < 0) + return false; + + sbuf_init_flt(&ds->sbuf, data->fbuf, KA1A_FRAME_SAMPLES, v->channels); + ds->sbuf.filled = samples; + + return true; +} + +void reset_ka1a(ka1a_codec_data* data) { + if (!data || !data->handle) return; + + ka1a_reset(data->handle); +} + +void seek_ka1a(VGMSTREAM* v, int32_t num_sample) { + ka1a_codec_data* data = v->codec_data; + decode_state_t* ds = v->decode_state; + if (!data) return; + + reset_ka1a(data); + + // find closest offset to desired sample + int32_t seek_frame = num_sample / KA1A_FRAME_SAMPLES; + int32_t seek_sample = num_sample % KA1A_FRAME_SAMPLES; + + ds->discard = seek_sample; + + if (v->codec_config) { + uint32_t seek_offset = seek_frame * data->frame_size; + + if (v->loop_ch) { + for (int ch = 0; ch < v->channels; ch++) { + v->loop_ch[ch].offset = v->loop_ch[ch].channel_start_offset + seek_offset; + } + } + } + else { + uint32_t seek_offset = seek_frame * data->frame_size * v->channels; + + if (v->loop_ch) { + v->loop_ch[0].offset = v->loop_ch[0].channel_start_offset + seek_offset; + } + } + + // (due to implicit encode delay the above is byte-exact equivalent vs a discard loop) + #if 0 + ds->discard = num_sample; + if (v->loop_ch) { + v->loop_ch[0].offset = v->loop_ch[0].channel_start_offset; + } + #endif +} + +void free_ka1a(ka1a_codec_data* data) { + if (!data) return; + + if (data->handle) + ka1a_free(data->handle); + free(data->buf); + free(data->fbuf); + free(data); +} diff --git a/src/coding/libs/ka1a_dec.c b/src/coding/libs/ka1a_dec.c new file mode 100644 index 00000000..af253534 --- /dev/null +++ b/src/coding/libs/ka1a_dec.c @@ -0,0 +1,636 @@ +#include +#include +#include +#include +#include + +#include "ka1a_dec.h" +#include "ka1a_dec_data.h" +#include "../../util/reader_get.h" + +/* Decodes Koei Tecmo's KA1A, a fairly simple transform-based (FFT) mono codec. + * + * The codec seems nameless (it has a "_CODECNAME" string) so this is named after streamed files' + * fourCC. It's somewhat inefficient (not very packed) but simple so maybe designed for speed. + * OG code isn't too optimized though. + * + * Reverse engineered from exes, thanks to Kelebek1 and AceKombat for help and debugging. + * Output has been compared to memdumps and should be accurate with minor +-diffs. + * + * Even though some parts can be simplified/optimized code tries to emulate what source code + * may look like, undoing unrolled/vectorized parts. Functions marked as 'inline' don't exist in + * decomp but surely were part of the source code, while 'unused' args may be remants/compilation details. + * + * If you are going to use this info/code elsewhere kindly credit your sources. It's the right thing to do. + */ + + +// Gets frame info based on bitrate mode, to unpack 1 frame. +// OG code calls this per frame but codec is CBR (single bitrate index) plus values +// could be precalculated per bitrate index (remnant of VBR or more complex modes?) +static void get_frame_info(int bitrate_index, int* p_steps_size, int* p_coefs_size) { + int coefs_bits = 0; + int steps_bits = 0; + + // first 8 bands use 8-bit codes and step is implicit + for (int i = 0; i < 8; i++) { + int codes = BAND_CODES[bitrate_index][i]; + coefs_bits += 8 * codes; + } + + if (bitrate_index <= 5) { + // lower bitrate modes have one 8-bit code, rest is 4-bit + coefs_bits += (MAX_BANDS - 8) * 8; + for (int i = 8; i < MAX_BANDS; i++) { + int step_bits = BAND_STEP_BITS[i]; + int codes = BAND_CODES[bitrate_index][i]; + steps_bits += step_bits * codes; + coefs_bits += 4 * (codes - 1); + } + } + else { + // higher bitrate modes use 8-bit codes + for (int i = 8; i < MAX_BANDS; i++) { + int step_bits = BAND_STEP_BITS[i]; + int codes = BAND_CODES[bitrate_index][i]; + steps_bits += step_bits * codes; + coefs_bits += 8 * codes; + } + } + + // bits to bytes + padding + *p_steps_size = (steps_bits + 7) >> 3; + *p_coefs_size = (coefs_bits + 7) >> 3; +} + +// Helper used in related functions, but not during decode. Note that 'mode' must be validated externally (-5..5). +// In practice values are: 0x60, 0x68, 0x73, 0x7d, 0x8c, 0x9b, 0xad, 0xc2, 0xd7, 0xed, 0x102. +static int get_frame_size(int bitrate_mode) { + int scalefactor_size = 0x04; + int steps_size = 0; + int coefs_size = 0; + get_frame_info(bitrate_mode + BITRATE_INDEX_MODIFIER, &steps_size, &coefs_size); + return scalefactor_size + steps_size + coefs_size; +} + + +// Convert 8-bit signed code as exp +// (note that 0.086643398 being float is important to get results closer to memdumps) +static inline float unpack_convert_code(uint8_t code, float scalefactor) { + float coef; + if (code) { + float code_f = (int8_t)code; + if (code & 0x80) { + code_f = -code_f; + scalefactor = -scalefactor; + } + + coef = expf((code_f - 127.0f) * 0.086643398f) * scalefactor; + } + else { + coef = 0.0; + } + + return coef; +} + +// Adjust current coef by -1.0..1.0 (4-bit subcode values 0..14 * 1/7 to -1.0..1.0; code 15 seems unused). +// (note that 0.14285715f being float is important to get results closer to memdumps) +static inline float unpack_convert_subcode(uint8_t code, float coef) { + return ((code * 0.14285715f) - 1.0f) * coef; +} + +// Get N bits (max 8) from data, MSB order. +// Doesn't check boundaries, but should never past src as bits come from fixed tables. +static inline int unpack_get_bits(uint8_t* src, int* p_byte_pos, int* p_bit_pos, int bits) { + int value = 0; + int byte_pos = *p_byte_pos; + int bit_pos = *p_bit_pos; + + int next_bitpos = bit_pos + bits; + if (next_bitpos > 8) { + // read between 2 bytes + if (next_bitpos <= 16) { // more shouldn't happen + uint32_t mask_lo = (1 << (8 - bit_pos)) - 1; + uint32_t mask_hi = (1 << (next_bitpos - 8)) - 1; + uint8_t code_lo = src[byte_pos+0]; + uint8_t code_hi = src[byte_pos+1]; + value = ((code_hi & mask_hi) << (8 - bit_pos)) + ((code_lo >> bit_pos) & mask_lo); + } + } + else { + // read in current byte + uint32_t mask = (1 << bits) - 1; + uint8_t code = src[byte_pos]; + value = (code >> bit_pos) & mask; + } + + bit_pos += bits; + if (next_bitpos >= 8) { + bit_pos = next_bitpos - 8; + byte_pos++; + } + + *p_byte_pos = byte_pos; + *p_bit_pos = bit_pos; + return value; +} + +// Unpack a single frame into quantized spectrum coefficients, packed like this: +// - 1 scalefactor (32-bit float) +// - N coef sub-positions aka steps (4-7 bits) per higher bands (8..21) +// - N codes (8-bit) per lower bands (0..7), of implicit positions +// - 1 main code (8-bit) per higher bands 8..21 then (N-1) coefs (8 or 4-bit) per bands +// +// Each code is converted to a coef then saved to certain position to dst buf. +// Lower bitrate modes use 4-bit codes that are relative to main coef (* +-1.0). +// +// Bands encode less coefs than dst may hold, so 'positions' are used to put coefs +// non-linearly, where unset indexes are 0 (dst must be memset before calling unpack frame). +// dst should be 1024, though usually only lower 512 (max step is 390 + ((1<<7) - 1)). +static void unpack_frame(uint8_t* src, float* dst, int steps_size, void* unused, int bitrate_index) { + + // copy coefs counts as they may be modified below + int band_codes_tmp[MAX_BANDS]; + for (int i = 0; i < MAX_BANDS; i++) { + band_codes_tmp[i] = BAND_CODES[bitrate_index][i]; + } + + // read base scalefactor (first 4 bytes) and setup buffers + float scalefactor = get_f32le(src); + uint8_t* src_steps = &src[0x04]; + uint8_t* src_codes = &src[0x04 + steps_size]; + + // negative scalefactor signals more/less codes for some bands (total doesn't change though) + if (scalefactor < 0.0f) { + scalefactor = -scalefactor; + + int mod = BITRATE_SUBMODE[bitrate_index]; + for (int i = 8; i < 12; i++) { + band_codes_tmp[i] += mod; + } + for (int i = 17; i < 21; i++) { + band_codes_tmp[i] -= mod; + } + } + + // coefs from lower bands (in practice fixed to 5 * 8) + int code_pos = 0; + for (int band = 0; band < 8; band++) { + int band_codes = band_codes_tmp[band]; + for (int i = 0; i < band_codes; i++) { + uint8_t code = src_codes[code_pos]; + dst[code_pos] = unpack_convert_code(code, scalefactor); + code_pos++; + } + } + + // simple bitreading helpers (struct?) + int br_bytepos = 0; + int br_bitpos = 0; // in current byte + + int subcode_pos = code_pos + (MAX_BANDS - 8); // position after bands 8..21 main coef + + uint8_t code; + float coef; + int substep; + + if (bitrate_index <= 5) { + // lower bitrates encode 1 main 8-bit coef per band and rest is main * +-1.0, position info in a bitstream + bool high_flag = false; + for (int band = 8; band < MAX_BANDS; band++) { + int band_codes = band_codes_tmp[band]; + int band_step = BAND_STEPS[band]; + int step_bits = BAND_STEP_BITS[band]; + + substep = unpack_get_bits(src_steps, &br_bytepos, &br_bitpos, step_bits); + + code = src_codes[code_pos]; + code_pos++; + + coef = unpack_convert_code(code, scalefactor); + dst[band_step + substep] = coef; + + for (int i = 1; i < band_codes; i++) { + substep = unpack_get_bits(src_steps, &br_bytepos, &br_bitpos, step_bits); + + code = src_codes[subcode_pos]; + if (high_flag) + subcode_pos++; + + uint8_t subcode = high_flag ? + (code >> 4) & 0x0F : + (code >> 0) & 0x0F; + + high_flag = !high_flag; + + dst[band_step + substep] = unpack_convert_subcode(subcode, coef); + } + } + } + else { + // higher bitrates encode all coefs normally, but still use lower bitrates' ordering scheme (see above) + for (int band = 8; band < MAX_BANDS; band++) { + int band_codes = band_codes_tmp[band]; + int band_step = BAND_STEPS[band]; + int step_bits = BAND_STEP_BITS[band]; + + substep = unpack_get_bits(src_steps, &br_bytepos, &br_bitpos, step_bits); + + code = src_codes[code_pos]; + code_pos++; + + coef = unpack_convert_code(code, scalefactor); + dst[band_step + substep] = coef; + + for (int i = 1; i < band_codes; i++) { + substep = unpack_get_bits(src_steps, &br_bytepos, &br_bitpos, step_bits); + + code = src_codes[subcode_pos]; + subcode_pos++; + + coef = unpack_convert_code(code, scalefactor); + dst[band_step + substep] = coef; + } + } + } +} + + +static void transform_twiddles(int points, float* real, float* imag, const float* tw_real, const float* tw_imag) { + for (int i = 0; i < points; i++) { + float coef_real = real[i]; + float coef_imag = imag[i]; + float twid_real = tw_real[i]; + float twid_imag = tw_imag[i]; + + real[i] = (twid_real * coef_real) - (twid_imag * coef_imag); + imag[i] = (twid_imag * coef_real) + (twid_real * coef_imag); + } +} + +static inline void transform_bit_reversal_permutation(int points, float* real, float* imag) { + const int half = points >> 1; + + int j = 0; + for (int i = 1; i < points; i++) { + + // j is typically calculated via subs of m, unsure if manual or compiler optimization + j = half ^ j; + int m = half; + while (m > j) { + m >>= 1; + j = m ^ j; + } + + if (i < j) { + float coef_real = real[i]; + float coef_imag = imag[i]; + real[i] = real[j]; + imag[i] = imag[j]; + real[j] = coef_real; + imag[j] = coef_imag; + } + } +} + +static void transform_fft(int points, void* unused, float* real, float* imag, const float* cos_table, const float* sin_table) { + const int half = points >> 1; + + transform_bit_reversal_permutation(points, real, imag); + + // these are actually the same value, so OG compilation only uses the cos_table one; added both for completeness + float w_real_base = cos_table[points >> 3]; + float w_imag_base = sin_table[points >> 3]; + + // FFT computation using twiddle factors and sub-ffts, probably some known optimization + for (int m = 4; m <= points; m <<= 1) { // 0.. (log2(256) / 2) + int m4 = m >> 2; + + for (int j = m4; j > 0; j >>= 2) { + int min = m4 - j; + int max = m4 - (j >> 1); + int i_md = min + 2 * m4; + + for (int k = min; k < max; k++) { + int i_lo = i_md - m4; + int i_hi = i_md + m4; + + float coef_im_a = imag[k] - imag[i_lo]; + float coef_re_a = real[k] - real[i_lo]; + real[k] = real[i_lo] + real[k]; + imag[k] = imag[i_lo] + imag[k]; + + float coef_re_b = real[i_hi] - real[i_md]; + float coef_im_b = imag[i_hi] - imag[i_md]; + float tmp_ra_ib = coef_re_a - coef_im_b; + float tmp_rb_ia = coef_re_b + coef_im_a; + float tmp_ib_ra = coef_im_b + coef_re_a; + float tmp_ia_rb = coef_im_a - coef_re_b; + + real[i_md] = real[i_hi] + real[i_md]; + imag[i_md] = imag[i_hi] + imag[i_md]; + real[i_lo] = tmp_ra_ib; + imag[i_lo] = tmp_rb_ia; + real[i_hi] = tmp_ib_ra; + imag[i_hi] = tmp_ia_rb; + + i_md++; + } + } + + if (m >= points) + continue; + + for (int j = m4; j > 0; j >>= 2) { + int min = m + m4 - j; + int max = m + m4 - (j >> 1); + int i_md = min + 2 * m4; + + for (int k = min; k < max; k++) { + int i_lo = i_md - m4; + int i_hi = i_md + m4; + + float coef_im_a = imag[k] - imag[i_lo]; + float coef_re_a = real[k] - real[i_lo]; + real[k] = real[i_lo] + real[k]; + imag[k] = imag[i_lo] + imag[k]; + + float coef_re_b = real[i_hi] - real[i_md]; + float coef_im_b = imag[i_hi] - imag[i_md]; + float tmp_ra_ib = coef_re_a - coef_im_b; + float tmp_rb_ia = coef_re_b + coef_im_a; + float tmp_ib_ra = coef_im_b + coef_re_a; + float tmp_ia_rb = coef_im_a - coef_re_b; + + real[i_md] = real[i_hi] + real[i_md]; + imag[i_md] = imag[i_hi] + imag[i_md]; + real[i_lo] = (tmp_rb_ia + tmp_ra_ib) * w_real_base; + imag[i_lo] = (tmp_rb_ia - tmp_ra_ib) * w_real_base; + real[i_hi] = (tmp_ia_rb - tmp_ib_ra) * w_imag_base; + imag[i_hi] = (-tmp_ia_rb - tmp_ib_ra) * w_imag_base; + + i_md++; + } + } + + int tmp_j = half; + for (int m2 = m * 2; m2 < points; m2 += m) { + // ??? + int tmp_m = half; + for (tmp_j ^= tmp_m; tmp_m > tmp_j; tmp_j ^= tmp_m) { + tmp_m = tmp_m >> 1; + } + + int table_index = tmp_j >> 2; + float w_real1 = cos_table[table_index]; + float w_imag1 = -sin_table[table_index]; + float w_real3 = cos_table[table_index * 3]; + float w_imag3 = -sin_table[table_index * 3]; + + for (int j = m4; j > 0; j >>= 2) { + int min = m2 + m4 - j; + int max = m2 + m4 - (j >> 1); + int i_md = min + 2 * m4; + + for (int k = min; k < max; k++) { + int i_lo = i_md - m4; + int i_hi = i_md + m4; + + float coef_im_a = imag[k] - imag[i_lo]; + float coef_re_a = real[k] - real[i_lo]; + real[k] = real[i_lo] + real[k]; + imag[k] = imag[i_lo] + imag[k]; + + float coef_im_b = imag[i_hi] - imag[i_md]; + float coef_re_b = real[i_hi] - real[i_md]; + float tmp_ra_ib = coef_re_a - coef_im_b; + float tmp_rb_ia = coef_re_b + coef_im_a; + float tmp_ib_ra = coef_im_b + coef_re_a; + float tmp_ia_rb = coef_im_a - coef_re_b; + + real[i_md] = real[i_hi] + real[i_md]; + imag[i_md] = imag[i_hi] + imag[i_md]; + real[i_lo] = (tmp_ra_ib * w_real1) - (tmp_rb_ia * w_imag1); + imag[i_lo] = (tmp_ra_ib * w_imag1) + (tmp_rb_ia * w_real1); + real[i_hi] = (tmp_ib_ra * w_real3) - (tmp_ia_rb * w_imag3); + imag[i_hi] = (tmp_ib_ra * w_imag3) + (tmp_ia_rb * w_real3); + + i_md++; + } + } + } + } + + // final swapping + for (int m = half; m > 0; m >>= 2) { + int min = half - m; + int max = half - (m >> 1); + + for (int k = min; k < max; k++) { + float coef_im = imag[k] - imag[k + half]; + float coef_re = real[k] - real[k + half]; + real[k] = real[k + half] + real[k]; + imag[k] = imag[k + half] + imag[k]; + real[k + half] = coef_re; + imag[k + half] = coef_im; + } + } +} + +// Transform unpacked time-domain coefficients (spectrum) to samples using inverse FFT. +// Seemingly a variation/simplification of the Cooley-Tukey algorithm (radix-4?). +void transform_frame(void* unused1, float* src, float* dst, void* unused2, float* fft_buf) { + float* real = fft_buf; + float* imag = fft_buf + 256; + + // initialize buffers from src + for (int i = 0; i < 256; i++) { + real[i] = src[i * 2]; + imag[255 - i] = src[i * 2 + 1]; + } + + transform_twiddles(256, real, imag, TWIDDLES_REAL, TWIDDLES_IMAG); + transform_fft(256, NULL, real, imag, COS_TABLE, SIN_TABLE); + transform_twiddles(256, real, imag, TWIDDLES_REAL, TWIDDLES_IMAG); + + // Scale results by (1 / 512) + for (int i = 0; i < 256; i++) { + real[i] *= 0.001953125f; + imag[i] *= 0.001953125f; + } + + // Reorder output (input buf may be reused as output here as there is no overlap). + // Note that input is 512 coefs but output is 1024 samples (externally combined with samples) + int pos = 0; + for (int i = 0; i < 128; i++) { + dst[pos++] = real[128 + i]; + dst[pos++] = -imag[127 - i]; + } + for (int i = 0; i < 256; i++) { + dst[pos++] = imag[i]; + dst[pos++] = -real[255 - i]; + } + for (int i = 0; i < 128; i++) { + dst[pos++] = -real[i]; + dst[pos++] = imag[255 - i]; + } +} + +// Decodes a block of frames (see .h) +// +// To get 512 samples decoder needs to combine samples from prev + current frame (MP3 granule-style?). +// though will only output samples from current. prev-frame can be optionally used to setup overlapping +// samples with 'setup_flag'. Since decoding current-frame will also setup the overlap for next frame, +// prev data and predecode-flag are only needed on init or after seeking. +// +// Original decoder expects 2 blocks in src (1 frame * channels * tracks): src[0] = prev, src[block-size] = curr +// (even if prev isn't used). This isn't very flexible, so this decoder expects only 1 block. +// Probably setup this odd way due to how data is read/handled in KT's engine. +static void decode_frame(unsigned char* src, int tracks, int channels, float* dst, int bitrate_mode, int setup_flag, float* prev, float* temp) { + float* fft_buf = &temp[0]; //size 512 * 2 + float* coefs = &temp[512 * 2]; //size 512 * 2 + + int bitrate_index = bitrate_mode + BITRATE_INDEX_MODIFIER; + int steps_size = 0; + int coefs_size = 0; + get_frame_info(bitrate_index, &steps_size, &coefs_size); + int frame_size = 0x04 + steps_size + coefs_size; + + // decode 'prev block of frames' (optional as it just setups 'prev' buf, no samples are written) + if (setup_flag) { + uint8_t* src_block = &src[0]; // 1st block in src + + for (int track = 0; track < tracks; track++) { + int frame_num = channels * track; + + for (int ch = 0; ch < channels; ch++) { + uint8_t* frame = &src_block[frame_num * frame_size]; + + memset(coefs, 0, FRAME_SAMPLES * sizeof(float)); + unpack_frame(frame, coefs, steps_size, NULL, bitrate_index); + transform_frame(NULL, coefs, coefs, NULL, fft_buf); + + int interleave = frame_num * FRAME_SAMPLES; + for (int i = 0; i < FRAME_SAMPLES; i++) { + // save samples for 'current block of frames' and overlap + prev[interleave + i] = coefs[512 + i] * OVERLAP_WINDOW[511 - i]; + } + + frame_num++; + } + } + } + + if (setup_flag) // MOD: expect only 1 block per call + return; + + // decode 'current block of frames' (writes 512 samples, plus setups 'prev' buf) + { + //uint8_t* src_block = &src[channels * tracks * frame_size]; // 2nd block in src in OG code + uint8_t* src_block = &src[0]; // MOD: expect only 1 block per call + + for (int track = 0; track < tracks; track++) { + int frame_num = channels * track; + + float* dst_track = &dst[frame_num * FRAME_SAMPLES]; + for (int ch = 0; ch < channels; ch++) { + uint8_t* frame = &src_block[frame_num * frame_size]; + + memset(coefs, 0, FRAME_SAMPLES * sizeof(float)); + unpack_frame(frame, coefs, steps_size, NULL, bitrate_index); + transform_frame(NULL, coefs, coefs, NULL, fft_buf); + + int interleave = frame_num * FRAME_SAMPLES; + for (int i = 0; i < FRAME_SAMPLES; i++) { + coefs[i] *= OVERLAP_WINDOW[i]; + coefs[512 + i] *= OVERLAP_WINDOW[511 - i]; + dst_track[i * channels + ch] = coefs[i] + prev[interleave + i]; + } + + // save overlapped samples for next + memcpy(&prev[interleave], &coefs[512], FRAME_SAMPLES * sizeof(float)); + + frame_num++; + } + } + } +} + +//----------------------------------------------------------------------------- +// API (not part of original code) + +struct ka1a_handle_t { + // config + int bitrate_mode; + int channels; + int tracks; + + // state + bool setup_flag; // next frame will be used as setup and won't output samples + float temp[1024 * 2]; // fft + coef buf + float* prev; // at least samples * channels * tracks +}; + +ka1a_handle_t* ka1a_init(int bitrate_mode, int channels, int tracks) { + + int bitrate_index = bitrate_mode + BITRATE_INDEX_MODIFIER; + if (bitrate_index < 0 || bitrate_index >= MAX_BITRATES) + return NULL; + + if (channels * tracks <= 0 || channels * tracks > MAX_CHANNELS_TRACKS) + return NULL; + + ka1a_handle_t* ctx = calloc(1, sizeof(ka1a_handle_t)); + if (!ctx) goto fail; + + ctx->prev = calloc(1, FRAME_SAMPLES * channels * tracks * sizeof(float)); + if (!ctx) goto fail; + + ctx->bitrate_mode = bitrate_mode; + ctx->channels = channels; + ctx->tracks = tracks; + + ka1a_reset(ctx); + + return ctx; +fail: + ka1a_free(ctx); + return NULL; +} + +void ka1a_free(ka1a_handle_t* ctx) { + if (!ctx) + return; + + free(ctx->prev); + free(ctx); +} + +void ka1a_reset(ka1a_handle_t* ctx) { + if (!ctx) + return; + + ctx->setup_flag = true; + // no need to reset buffers as on next decode frame will be used to setup them. +} + +int ka1a_decode(ka1a_handle_t* ctx, unsigned char* src, float* dst) { + if (!ctx) + return -1; + + decode_frame(src, ctx->tracks, ctx->channels, dst, ctx->bitrate_mode, ctx->setup_flag, ctx->prev, ctx->temp); + + if (ctx->setup_flag) { + ctx->setup_flag = false; + return 0; + } + + return FRAME_SAMPLES; +} + +int ka1a_get_frame_size(ka1a_handle_t* ctx) { + if (!ctx) + return 0; + return get_frame_size(ctx->bitrate_mode); +} diff --git a/src/coding/libs/ka1a_dec.h b/src/coding/libs/ka1a_dec.h new file mode 100644 index 00000000..4c3fe373 --- /dev/null +++ b/src/coding/libs/ka1a_dec.h @@ -0,0 +1,42 @@ +#ifndef _KA1A_DEC_ +#define _KA1A_DEC_ + +/* Decodes Koei Tecmo's KA1A, a fairly simple transform-based (FFT) mono codec. */ + + +//#define KA1A_FRAME_SIZE_MAX 0x200 +#define KA1A_FRAME_SAMPLES 512 + + +typedef struct ka1a_handle_t ka1a_handle_t; + +/* Inits decoder. + * - bitrate_mode: value from header (-5..5) + * - channels: Nch-interleaved tracks + * - tracks: number of parts of N-ch + * + * Channel/tracks define final interleaved output per ka1a_decode: + * [track0 ch0 ch1 ch0 ch1... x512][track1 ch0 ch1 ch0 ch1... x512]... + * Codec is mono though, so this can be safely reinterpreted, ex. channels = tracks * channels, tracks = 1: + * [track0 ch0 ch1 ch3 ch4 ch5 ch6... x512] + * or even make N single decoders per track/channel and pass single frames. + */ +ka1a_handle_t* ka1a_init(int bitrate_mode, int channels, int tracks); + +void ka1a_free(ka1a_handle_t* handle); + +void ka1a_reset(ka1a_handle_t* handle); + +/* Decodes one block of data. + * Returns samples done, 0 on setup or negative or error. + * After init/reset next decode won't input samples (similar to encoder delay). + * + * src should have frame_size * channels * tracks. + * dst should have KA1A_FRAME_SAMPLES * channels * tracks (see init for interleave info). + */ +int ka1a_decode(ka1a_handle_t* handle, unsigned char* src, float* dst); + +// Get current frame size for one single frame. +int ka1a_get_frame_size(ka1a_handle_t* handle); + +#endif diff --git a/src/coding/libs/ka1a_dec_data.h b/src/coding/libs/ka1a_dec_data.h new file mode 100644 index 00000000..fb7271b3 --- /dev/null +++ b/src/coding/libs/ka1a_dec_data.h @@ -0,0 +1,260 @@ +#ifndef _KA1A_DEC_DATA_ +#define _KA1A_DEC_DATA_ + +#define MAX_CHANNELS_TRACKS 32 //arbitrary max + +#define FRAME_SAMPLES 512 +#define MAX_BANDS 21 +#define FFT_POINTS 256 +#define MAX_BITRATES 11 + +// bitrate mode in header is defined from -5 to 5, where negative are lower bitrate modes which use +// less resolution for some codes. Related functions need to add +5 to index so it's pretty pointless. +#define BITRATE_INDEX_MODIFIER 5 + +// default number of quantized coefficients encoded per band, for each bitrate modes +static const int BAND_CODES[MAX_BITRATES][MAX_BANDS] = { + {5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, }, + {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, }, + {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, }, + {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, }, + {5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, }, + {5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }, + {5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, }, + {5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }, + {5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }, + {5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, }, + {5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, }, +}; + +// Number of modified coefs to be added/substracted to some bands, for each bitrate mode (varies per frame) +// Total per 1 band shouldn't go over 10. +static const int BITRATE_SUBMODE[MAX_BITRATES] = { + 0, 0, 0, 2, 2, 2, 4, 3, 2, 1, 0, +}; + +// base positions in dst buffer for coefs in frame. A sub-position (implicit or from a bitstream) sets +// the final index, which doesn't need to be linear. +// ex. band 13 may write 6 coefs to dst[120 + step], where step may be 0, 11, 6, 2, 8, 13 +// (max 19; unset indexes are implicitly 0) +static const int BAND_STEPS[MAX_BANDS] = { + 0, 5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 80, 100, 120, 140, 170, 200, 240, 300, 390, +}; + +// lower bands are 0 since all tables above are fixed to 8 +static const int BAND_STEP_BITS[MAX_BANDS] = { + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 7, 7, +}; + +// 360 cosine, close to: for (0..256) t[i] = cos(2 * PI * i / points) with some rounding? +static const float COS_TABLE[FFT_POINTS] = { + 1.0, 0.99969882, 0.99879545, 0.99729043, 0.99518472, 0.99247956, 0.98917651, 0.98527765, + 0.98078525, 0.97570211, 0.97003126, 0.96377605, 0.95694035, 0.94952816, 0.94154406, 0.93299282, + 0.9238795, 0.91420972, 0.90398932, 0.8932243, 0.88192123, 0.87008697, 0.8577286, 0.84485358, + 0.8314696, 0.81758481, 0.80320752, 0.78834641, 0.77301043, 0.75720882, 0.74095112, 0.7242471, + 0.70710677, 0.68954051, 0.67155892, 0.65317279, 0.63439327, 0.61523157, 0.59569931, 0.57580817, + 0.55557019, 0.53499764, 0.5141027, 0.4928982, 0.47139665, 0.44961131, 0.42755511, 0.40524128, + 0.38268343, 0.35989496, 0.33688983, 0.31368166, 0.29028463, 0.26671275, 0.24298012, 0.21910122, + 0.19509023, 0.17096186, 0.1467305, 0.12241063, 0.098017134, 0.073564492, 0.04906765, 0.024541136, + -0.0000000437, -0.024541223, -0.049067739, -0.073564574, -0.098017223, -0.12241071, -0.14673057, -0.17096195, + -0.19509032, -0.21910131, -0.2429802, -0.26671284, -0.29028472, -0.31368172, -0.33688992, -0.35989505, + -0.38268352, -0.40524134, -0.42755508, -0.44961137, -0.47139683, -0.49289817, -0.51410276, -0.5349977, + -0.55557036, -0.57580817, -0.59569937, -0.61523169, -0.63439327, -0.65317285, -0.67155904, -0.68954068, + -0.70710677, -0.72424716, -0.74095124, -0.75720882, -0.77301049, -0.78834647, -0.80320764, -0.81758481, + -0.83146966, -0.84485364, -0.8577286, -0.87008703, -0.88192135, -0.8932243, -0.90398932, -0.91420978, + -0.92387962, -0.93299282, -0.94154412, -0.94952822, -0.95694035, -0.96377605, -0.97003126, -0.97570217, + -0.98078531, -0.98527765, -0.98917651, -0.9924795, -0.99518472, -0.99729049, -0.99879545, -0.99969882, + -1.0, -0.99969882, -0.99879545, -0.99729043, -0.99518472, -0.9924795, -0.98917651, -0.98527765, + -0.98078525, -0.97570211, -0.97003126, -0.96377605, -0.95694029, -0.94952816, -0.94154406, -0.93299276, + -0.9238795, -0.91420972, -0.90398926, -0.89322418, -0.88192123, -0.87008691, -0.85772854, -0.84485358, + -0.83146954, -0.81758469, -0.80320752, -0.78834641, -0.77301037, -0.7572087, -0.74095112, -0.72424704, + -0.70710665, -0.68954057, -0.67155892, -0.65317291, -0.63439333, -0.61523157, -0.59569919, -0.57580805, + -0.55557001, -0.53499734, -0.51410282, -0.4928982, -0.47139668, -0.44961122, -0.42755494, -0.40524107, + -0.38268313, -0.35989511, -0.33688986, -0.31368169, -0.29028454, -0.26671258, -0.24297991, -0.21910091, + -0.19509038, -0.17096189, -0.14673041, -0.12241054, -0.098016933, -0.073564284, -0.049067326, -0.024541287, + 0.0000000119, 0.024541309, 0.049067825, 0.073564783, 0.098017432, 0.12241104, 0.14673042, 0.17096192, + 0.19509041, 0.2191014, 0.24298041, 0.26671305, 0.29028502, 0.31368169, 0.33688989, 0.35989514, + 0.3826836, 0.40524155, 0.42755538, 0.44961166, 0.47139671, 0.49289823, 0.51410282, 0.53499776, + 0.55557042, 0.57580847, 0.59569925, 0.61523157, 0.63439333, 0.65317291, 0.6715591, 0.68954074, + 0.70710701, 0.72424704, 0.74095112, 0.75720888, 0.77301055, 0.78834653, 0.8032077, 0.81758499, + 0.8314696, 0.84485358, 0.85772866, 0.87008709, 0.88192135, 0.89322442, 0.90398943, 0.91420972, + 0.92387956, 0.93299282, 0.94154412, 0.94952828, 0.95694041, 0.96377617, 0.97003126, 0.97570211, + 0.98078531, 0.98527765, 0.98917657, 0.99247956, 0.99518478, 0.99729043, 0.99879545, 0.99969882, +}; + +// 360 sine, close to: for (0..256) t[i] = cos(2 * PI * i / points) with some rounding? +static const float SIN_TABLE[FFT_POINTS] = { + 0.0, 0.024541229, 0.049067676, 0.073564567, 0.098017141, 0.12241068, 0.14673047, 0.1709619, + 0.19509032, 0.21910124, 0.2429802, 0.26671278, 0.29028466, 0.31368175, 0.33688986, 0.35989505, + 0.38268346, 0.40524134, 0.42755508, 0.44961134, 0.47139674, 0.49289823, 0.51410276, 0.53499764, + 0.55557024, 0.57580823, 0.59569931, 0.61523163, 0.63439333, 0.65317285, 0.67155898, 0.68954057, + 0.70710677, 0.7242471, 0.74095118, 0.75720888, 0.77301043, 0.78834641, 0.80320752, 0.81758481, + 0.83146966, 0.84485358, 0.85772866, 0.87008697, 0.88192129, 0.8932243, 0.90398932, 0.91420978, + 0.9238795, 0.93299282, 0.94154406, 0.94952822, 0.95694035, 0.96377605, 0.97003126, 0.97570211, + 0.98078531, 0.98527765, 0.98917651, 0.99247956, 0.99518472, 0.99729043, 0.99879545, 0.99969882, + 1.0, 0.99969882, 0.99879545, 0.99729043, 0.99518472, 0.9924795, 0.98917651, 0.98527765, + 0.98078525, 0.97570211, 0.97003126, 0.96377605, 0.95694029, 0.94952816, 0.94154406, 0.93299282, + 0.9238795, 0.91420972, 0.90398932, 0.8932243, 0.88192123, 0.87008703, 0.8577286, 0.84485352, + 0.83146954, 0.81758481, 0.80320752, 0.78834635, 0.77301049, 0.75720882, 0.74095106, 0.72424698, + 0.70710677, 0.68954051, 0.67155886, 0.65317285, 0.63439327, 0.61523151, 0.59569913, 0.57580817, + 0.55557019, 0.53499746, 0.51410276, 0.49289814, 0.47139663, 0.44961137, 0.42755505, 0.40524122, + 0.38268328, 0.35989505, 0.3368898, 0.3136816, 0.29028472, 0.26671273, 0.24298008, 0.21910107, + 0.19509031, 0.17096181, 0.14673033, 0.1224107, 0.098017097, 0.073564447, 0.049067486, 0.02454121, + -0.000000087399997, -0.024541385, -0.049067661, -0.073564619, -0.098017268, -0.12241087, -0.1467305, -0.17096199, + -0.19509049, -0.21910124, -0.24298024, -0.2667129, -0.29028487, -0.31368178, -0.33688995, -0.3598952, + -0.38268343, -0.4052414, -0.42755523, -0.44961151, -0.47139677, -0.49289829, -0.51410288, -0.53499764, + -0.5555703, -0.57580835, -0.59569931, -0.61523163, -0.63439339, -0.65317297, -0.67155898, -0.68954062, + -0.70710689, -0.7242471, -0.74095118, -0.75720876, -0.77301043, -0.78834647, -0.80320758, -0.81758493, + -0.83146977, -0.84485376, -0.85772854, -0.87008697, -0.88192129, -0.89322436, -0.90398937, -0.91420984, + -0.92387968, -0.93299276, -0.94154406, -0.94952822, -0.95694035, -0.96377611, -0.97003132, -0.97570223, + -0.98078525, -0.98527765, -0.98917651, -0.99247956, -0.99518472, -0.99729049, -0.99879545, -0.99969882, + -1.0, -0.99969882, -0.99879545, -0.99729043, -0.99518472, -0.9924795, -0.98917651, -0.98527765, + -0.98078525, -0.97570211, -0.9700312, -0.96377599, -0.95694023, -0.94952822, -0.94154406, -0.93299276, + -0.92387944, -0.91420966, -0.90398914, -0.89322412, -0.88192129, -0.87008697, -0.85772854, -0.84485346, + -0.83146948, -0.81758463, -0.80320758, -0.78834641, -0.77301043, -0.75720876, -0.740951, -0.72424692, + -0.70710653, -0.68954062, -0.67155898, -0.65317279, -0.63439316, -0.61523145, -0.59569907, -0.57580793, + -0.5555703, -0.53499764, -0.5141027, -0.49289808, -0.47139654, -0.44961107, -0.42755479, -0.40524137, + -0.38268343, -0.35989496, -0.33688971, -0.31368154, -0.2902844, -0.2667124, -0.24298023, -0.21910122, +}; + +// similar but not quite: for (0..256) t[i] = cos(2 * PI * i / points); +static const float TWIDDLES_REAL[FFT_POINTS] = { + 0.9999997, 0.99997616, 0.999915, 0.99981618, 0.99967968, 0.99950558, 0.99929386, 0.99904448, + 0.99875754, 0.99843293, 0.99807078, 0.99767107, 0.99723375, 0.99675888, 0.99624652, 0.9956966, + 0.99510926, 0.99448442, 0.99382216, 0.99312246, 0.99238533, 0.99161088, 0.99079913, 0.98995006, + 0.98906368, 0.98814011, 0.98717928, 0.98618132, 0.98514622, 0.98407406, 0.98296481, 0.98181856, + 0.98063534, 0.97941524, 0.97815824, 0.9768644, 0.97553378, 0.97416645, 0.97276247, 0.97132182, + 0.96984458, 0.96833086, 0.96678072, 0.96519411, 0.96357119, 0.96191204, 0.96021664, 0.95848507, + 0.95671743, 0.95491374, 0.9530741, 0.95119864, 0.9492873, 0.94734025, 0.94535756, 0.94333923, + 0.94128537, 0.93919611, 0.9370715, 0.93491161, 0.93271649, 0.93048626, 0.92822099, 0.92592078, + 0.92358571, 0.92121589, 0.91881138, 0.9163723, 0.91389865, 0.91139066, 0.90884835, 0.90627176, + 0.90366107, 0.90101641, 0.89833778, 0.89562535, 0.89287919, 0.89009941, 0.88728613, 0.88443941, + 0.88155943, 0.87864625, 0.8757, 0.87272078, 0.86970866, 0.86666387, 0.86358637, 0.86047643, + 0.85733402, 0.85415941, 0.85095257, 0.84771371, 0.84444296, 0.84114039, 0.83780617, 0.83444041, + 0.83104324, 0.82761478, 0.82415515, 0.82066447, 0.8171429, 0.81359059, 0.81000769, 0.80639422, + 0.80275041, 0.79907632, 0.79537225, 0.7916382, 0.78787428, 0.7840808, 0.78025776, 0.77640527, + 0.77252364, 0.76861292, 0.76467323, 0.76070476, 0.75670767, 0.75268203, 0.74862808, 0.74454594, + 0.74043584, 0.73629779, 0.73213202, 0.72793871, 0.72371799, 0.71947002, 0.71519494, 0.71089298, + 0.70656419, 0.70220888, 0.6978271, 0.69341904, 0.68898481, 0.68452471, 0.68003887, 0.67552733, + 0.67099041, 0.66642827, 0.66184098, 0.65722877, 0.65259188, 0.64793038, 0.64324445, 0.63853431, + 0.63380021, 0.62904215, 0.62426049, 0.61945528, 0.61462677, 0.60977507, 0.60490042, 0.60000306, + 0.59508306, 0.59014064, 0.58517605, 0.58018941, 0.57518089, 0.57015073, 0.56509918, 0.56002629, + 0.5549323, 0.5498175, 0.54468191, 0.53952587, 0.5343495, 0.52915293, 0.52393651, 0.51870036, + 0.51344466, 0.50816965, 0.50287557, 0.4975625, 0.49223068, 0.48688033, 0.48151165, 0.47612482, + 0.47072011, 0.46529773, 0.45985776, 0.45440048, 0.44892606, 0.44343477, 0.43792677, 0.43240228, + 0.42686164, 0.42130479, 0.41573209, 0.41014373, 0.40453994, 0.39892092, 0.39328688, 0.38763815, + 0.3819747, 0.37629688, 0.3706049, 0.36489895, 0.35917926, 0.35344607, 0.34769964, 0.34194005, + 0.33616757, 0.33038244, 0.32458487, 0.31877509, 0.31295338, 0.30711982, 0.30127469, 0.2954182, + 0.28955057, 0.28367206, 0.27778289, 0.27188337, 0.26597348, 0.26005358, 0.2541239, 0.24818464, + 0.24223605, 0.23627833, 0.23031183, 0.22433653, 0.21835281, 0.21236086, 0.20636091, 0.20035319, + 0.19433793, 0.18831547, 0.1822858, 0.17624927, 0.1702061, 0.16415653, 0.15810078, 0.15203907, + 0.14597176, 0.13989884, 0.13382064, 0.1277374, 0.12164936, 0.11555674, 0.10945977, 0.10335879, + 0.097253807, 0.091145165, 0.085033081, 0.078917801, 0.072799556, 0.066678561, 0.060555179, 0.054429397, + 0.048301566, 0.042171918, 0.036040682, 0.029908087, 0.023774367, 0.01763987, 0.011504591, 0.0053688786, +}; + +// similar but not quite: for (0..256) t[i] = -sin(2 * PI * i / points); +static const float TWIDDLES_IMAG[] = { + -0.00076699042, -0.0069028586, -0.013038468, -0.019173585, -0.025307981, -0.031441424, -0.037573684, -0.043704528, + -0.049833726, -0.05596105, -0.062086266, -0.068209141, -0.074329458, -0.080446973, -0.086561449, -0.092672676, + -0.098780416, -0.10488442, -0.1109845, -0.11708038, -0.12317186, -0.12925872, -0.13534068, -0.14141756, + -0.14748912, -0.15355512, -0.15961535, -0.16566958, -0.17171754, -0.17775905, -0.18379387, -0.18982176, + -0.19584252, -0.20185591, -0.20786169, -0.21385963, -0.21984953, -0.22583117, -0.23180428, -0.23776868, + -0.24372412, -0.24967039, -0.25560728, -0.26153448, -0.26745188, -0.27335921, -0.27925625, -0.28514278, + -0.29101855, -0.2968834, -0.30273706, -0.3085793, -0.31440994, -0.32022873, -0.3260355, -0.33182994, + -0.33761194, -0.3433812, -0.34913751, -0.35488072, -0.36061054, -0.36632681, -0.37202924, -0.3777177, + -0.38339195, -0.38905174, -0.39469689, -0.40032718, -0.40594241, -0.41154233, -0.41712674, -0.42269552, + -0.42824832, -0.43378502, -0.43930539, -0.44480923, -0.45029631, -0.45576641, -0.4612194, -0.46665499, + -0.47207305, -0.47747329, -0.48285556, -0.48821968, -0.49356541, -0.49889252, -0.50420088, -0.50949031, + -0.51476049, -0.52001131, -0.52524251, -0.53045398, -0.53564543, -0.54081678, -0.54596776, -0.55109817, + -0.55620778, -0.56129652, -0.56636411, -0.57141036, -0.57643509, -0.58143818, -0.58641928, -0.59137839, + -0.59631521, -0.60122955, -0.60612124, -0.61099017, -0.61583608, -0.62065876, -0.62545812, -0.63023394, + -0.63498604, -0.63971418, -0.64441824, -0.6490981, -0.6537534, -0.6583842, -0.66299021, -0.66757119, + -0.67212707, -0.67665768, -0.68116277, -0.68564218, -0.69009584, -0.69452351, -0.69892502, -0.70330018, + -0.70764893, -0.71197104, -0.71626627, -0.72053456, -0.72477579, -0.72898966, -0.73317605, -0.73733491, + -0.74146605, -0.74556917, -0.74964428, -0.75369114, -0.75770962, -0.76169956, -0.76566088, -0.76959336, + -0.77349681, -0.77737117, -0.78121626, -0.78503191, -0.78881806, -0.79257452, -0.79630113, -0.79999769, + -0.80366421, -0.80730045, -0.81090623, -0.81448156, -0.81802624, -0.82154006, -0.825023, -0.82847482, + -0.83189553, -0.83528483, -0.83864272, -0.84196901, -0.84526366, -0.84852648, -0.85175729, -0.85495609, + -0.85812271, -0.86125702, -0.86435878, -0.86742812, -0.8704648, -0.8734687, -0.87643969, -0.87937772, + -0.88228261, -0.88515425, -0.88799256, -0.8907975, -0.89356887, -0.89630663, -0.89901066, -0.90168083, + -0.90431696, -0.90691912, -0.90948713, -0.91202086, -0.91452032, -0.91698533, -0.91941583, -0.92181164, + -0.92417276, -0.92649913, -0.92879063, -0.93104714, -0.93326861, -0.93545491, -0.93760598, -0.93972176, + -0.9418022, -0.94384718, -0.94585657, -0.94783038, -0.94976848, -0.95167089, -0.9535374, -0.95536804, + -0.95716274, -0.95892137, -0.96064389, -0.96233022, -0.96398038, -0.96559417, -0.96717167, -0.96871275, + -0.97021735, -0.97168541, -0.97311687, -0.97451174, -0.97586989, -0.97719133, -0.97847593, -0.97972375, + -0.98093462, -0.98210859, -0.98324561, -0.98434556, -0.98540848, -0.98643428, -0.987423, -0.98837447, + -0.98928875, -0.99016583, -0.99100554, -0.991808, -0.99257314, -0.99330086, -0.99399126, -0.99464417, + -0.99525958, -0.99583763, -0.99637812, -0.99688113, -0.99734658, -0.99777448, -0.99816483, -0.99851763, + -0.99883282, -0.99911034, -0.99935031, -0.99955267, -0.99971735, -0.99984443, -0.99993384, -0.99998558, +}; + +// seems custom, perhaps based on some common one with some alpha? +static const float OVERLAP_WINDOW[FRAME_SAMPLES] = { + 0.00041374451, 0.00063187029, 0.00083242479, 0.0010303947, 0.0012312527, 0.0014377162, 0.0016513923, 0.001873354, + 0.0021043862, 0.0023451056, 0.0025960256, 0.0028575913, 0.0031302026, 0.0034142293, 0.003710018, 0.0040178993, + 0.0043381932, 0.00467121, 0.0050172545, 0.0053766258, 0.00574962, 0.0061365301, 0.0065376465, 0.0069532581, + 0.007383653, 0.0078291167, 0.008289936, 0.0087663941, 0.0092587769, 0.0097673666, 0.010292448, 0.010834301, + 0.01139321, 0.011969455, 0.012563316, 0.013175075, 0.01380501, 0.0144534, 0.015120523, 0.015806656, + 0.016512074, 0.017237054, 0.017981868, 0.018746791, 0.019532094, 0.020338045, 0.021164915, 0.022012968, + 0.022882473, 0.023773693, 0.02468689, 0.025622323, 0.026580252, 0.027560933, 0.028564619, 0.02959156, + 0.030642008, 0.031716209, 0.032814406, 0.033936843, 0.035083756, 0.036255382, 0.037451953, 0.038673703, + 0.039920855, 0.041193634, 0.042492259, 0.043816946, 0.045167912, 0.046545364, 0.047949508, 0.049380545, + 0.050838675, 0.05232409, 0.053836983, 0.055377539, 0.056945939, 0.058542356, 0.06016697, 0.061819945, + 0.063501447, 0.065211624, 0.066950649, 0.068718657, 0.070515797, 0.07234221, 0.074198022, 0.07608337, + 0.07799837, 0.07994315, 0.081917815, 0.083922468, 0.085957222, 0.088022165, 0.090117387, 0.092242986, + 0.094399013, 0.096585557, 0.098802686, 0.10105046, 0.10332893, 0.10563815, 0.10797815, 0.11034897, + 0.11275065, 0.1151832, 0.11764663, 0.12014097, 0.12266621, 0.12522236, 0.12780938, 0.13042729, + 0.13307604, 0.13575561, 0.13846597, 0.14120705, 0.14397883, 0.14678125, 0.14961423, 0.15247771, + 0.15537159, 0.15829581, 0.16125028, 0.16423489, 0.16724953, 0.17029409, 0.17336844, 0.17647249, + 0.17960605, 0.18276905, 0.18596126, 0.18918259, 0.19243285, 0.19571187, 0.19901948, 0.2023555, + 0.20571974, 0.20911199, 0.21253204, 0.21597971, 0.21945477, 0.22295699, 0.22648615, 0.23004198, + 0.23362428, 0.23723276, 0.2408672, 0.2445273, 0.24821278, 0.25192341, 0.25565886, 0.25941887, + 0.26320317, 0.26701137, 0.27084324, 0.27469841, 0.27857658, 0.28247747, 0.28640065, 0.29034585, + 0.29431269, 0.29830083, 0.30230993, 0.30633962, 0.31038952, 0.31445926, 0.31854844, 0.32265672, + 0.32678369, 0.33092892, 0.33509207, 0.33927271, 0.34347042, 0.3476848, 0.35191545, 0.35616189, + 0.36042371, 0.36470053, 0.36899185, 0.37329727, 0.37761635, 0.38194862, 0.38629359, 0.3906509, + 0.39502001, 0.3994005, 0.4037919, 0.40819371, 0.41260549, 0.41702676, 0.42145702, 0.42589581, + 0.43034267, 0.43479711, 0.43925858, 0.44372663, 0.44820082, 0.45268059, 0.45716542, 0.4616549, + 0.4661485, 0.47064567, 0.47514597, 0.47964889, 0.4841539, 0.48866051, 0.49316826, 0.49767655, + 0.50218499, 0.50669295, 0.51120001, 0.5157057, 0.52020943, 0.52471071, 0.52920908, 0.53370398, + 0.53819495, 0.54268152, 0.54716307, 0.5516392, 0.55610937, 0.56057316, 0.56502998, 0.56947935, + 0.57392085, 0.57835394, 0.5827781, 0.58719289, 0.59159786, 0.59599245, 0.60037625, 0.60474873, + 0.6091094, 0.61345792, 0.61779374, 0.62211639, 0.62642545, 0.63072038, 0.63500077, 0.63926625, + 0.6435163, 0.64775056, 0.65196848, 0.65616965, 0.66035372, 0.66452026, 0.66866881, 0.67279899, + 0.67691034, 0.6810025, 0.6850751, 0.68912768, 0.69315994, 0.69717139, 0.7011618, 0.70513064, + 0.70907766, 0.71300244, 0.7169047, 0.72078407, 0.72464013, 0.72847265, 0.73228133, 0.73606575, + 0.73982555, 0.74356061, 0.74727046, 0.75095487, 0.75461364, 0.7582463, 0.76185274, 0.76543266, + 0.76898569, 0.77251172, 0.77601039, 0.77948159, 0.78292501, 0.78634042, 0.78972763, 0.79308641, + 0.79641658, 0.79971796, 0.80299026, 0.80623347, 0.80944729, 0.81263155, 0.81578618, 0.81891102, + 0.82200587, 0.82507062, 0.82810515, 0.83110934, 0.83408308, 0.83702624, 0.83993882, 0.84282064, + 0.84567159, 0.84849167, 0.85128081, 0.85403895, 0.85676599, 0.85946196, 0.86212677, 0.86476046, + 0.86736292, 0.86993414, 0.87247425, 0.87498307, 0.87746072, 0.87990719, 0.88232255, 0.88470674, + 0.88705987, 0.88938189, 0.89167297, 0.89393318, 0.89616245, 0.89836091, 0.90052873, 0.90266585, + 0.90477246, 0.90684867, 0.90889448, 0.91091013, 0.91289562, 0.91485113, 0.91677684, 0.91867274, + 0.92053914, 0.9223761, 0.92418379, 0.92596233, 0.92771196, 0.92943287, 0.93112504, 0.93278885, + 0.9344244, 0.936032, 0.93761164, 0.93916368, 0.94068825, 0.94218558, 0.94365591, 0.94509935, + 0.94651628, 0.94790679, 0.9492712, 0.95060962, 0.95192248, 0.95320988, 0.95447206, 0.95570934, + 0.95692199, 0.95811015, 0.95927411, 0.96041423, 0.96153063, 0.96262366, 0.96369362, 0.96474063, + 0.96576512, 0.96676731, 0.96774739, 0.96870577, 0.96964264, 0.97055829, 0.97145301, 0.97232717, + 0.97318095, 0.97401464, 0.97482848, 0.97562289, 0.97639805, 0.97715431, 0.97789192, 0.97861117, + 0.97931236, 0.97999579, 0.98066169, 0.98131043, 0.98194218, 0.98255736, 0.98315614, 0.98373884, + 0.9843058, 0.98485726, 0.98539352, 0.98591483, 0.98642153, 0.9869138, 0.98739201, 0.98785633, + 0.98830712, 0.98874468, 0.98916918, 0.98958093, 0.98998028, 0.99036741, 0.99074256, 0.99110597, + 0.991458, 0.99179888, 0.99212885, 0.99244815, 0.99275702, 0.9930557, 0.99334443, 0.9936235, + 0.99389309, 0.99415344, 0.99440479, 0.99464744, 0.99488151, 0.99510723, 0.99532485, 0.9955346, + 0.99573666, 0.99593133, 0.99611866, 0.99629897, 0.99647242, 0.99663919, 0.99679953, 0.99695361, + 0.9971016, 0.99724364, 0.99737996, 0.99751073, 0.99763614, 0.9977563, 0.99787146, 0.99798179, + 0.99808735, 0.99818838, 0.99828494, 0.99837732, 0.99846554, 0.99854976, 0.99863017, 0.99870688, + 0.99878007, 0.99884975, 0.99891615, 0.99897939, 0.99903959, 0.99909681, 0.99915117, 0.99920285, + 0.9992519, 0.99929845, 0.99934256, 0.9993844, 0.99942398, 0.99946147, 0.99949694, 0.99953043, + 0.99956208, 0.99959201, 0.99962014, 0.99964666, 0.9996717, 0.99969524, 0.99971735, 0.99973816, + 0.99975771, 0.99977601, 0.99979317, 0.99980927, 0.99982429, 0.99983829, 0.99985141, 0.99986362, + 0.99987501, 0.99988562, 0.99989551, 0.99990469, 0.99991322, 0.99992108, 0.99992836, 0.99993503, + 0.99994129, 0.99994701, 0.99995226, 0.99995708, 0.99996156, 0.99996561, 0.99996936, 0.9999727, + 0.9999758, 0.9999786, 0.99998116, 0.99998349, 0.99998552, 0.99998742, 0.99998909, 0.99999058, + 0.99999189, 0.99999309, 0.99999416, 0.99999511, 0.99999589, 0.99999666, 0.99999726, 0.99999779, + 0.99999827, 0.99999863, 0.99999899, 0.99999923, 0.99999946, 0.99999964, 0.99999976, 0.99999988, +}; + +#endif diff --git a/src/formats.c b/src/formats.c index c3dfe85a..3e32c8c9 100644 --- a/src/formats.c +++ b/src/formats.c @@ -271,6 +271,7 @@ static const char* extension_list[] = { "joe", "jstm", + "ka1a", "kat", "kces", "kcey", //fake extension/header id for .pcm (renamed, to be removed) @@ -907,6 +908,7 @@ static const coding_info coding_info_list[] = { {coding_TAC, "tri-Ace Codec"}, {coding_ICE_RANGE, "Inti Creates Range Codec"}, {coding_ICE_DCT, "Inti Creates DCT Codec"}, + {coding_KA1A, "Koei Tecmo KA1A Codec"}, #ifdef VGM_USE_VORBIS {coding_OGG_VORBIS, "Ogg Vorbis"}, @@ -1449,6 +1451,7 @@ static const meta_info meta_info_list[] = { {meta_DSP_ASURA, "Rebellion DSP header"}, {meta_ONGAKUKAN_RIFF_ADP, "Ongakukan RIFF WAVE header"}, {meta_SDD, "Doki Denki DSBH header"}, + {meta_KA1A, "Koei Tecmo KA1A header"}, }; void get_vgmstream_coding_description(VGMSTREAM* vgmstream, char* out, size_t out_size) { diff --git a/src/layout/segmented.c b/src/layout/segmented.c index 02b14f6f..32214927 100644 --- a/src/layout/segmented.c +++ b/src/layout/segmented.c @@ -80,16 +80,18 @@ void render_vgmstream_segmented(sbuf_t* sbuf, VGMSTREAM* vgmstream) { ssrc->buf = buf_filled; } - render_main(ssrc, data->segments[data->current_segment]); - + int samples_done = render_main(ssrc, data->segments[data->current_segment]); + samples_done = samples_to_do; // returned buf may have changed if (ssrc->buf != buf_filled) { - sbuf_copy_segments(sbuf, ssrc); + sbuf_copy_segments(sbuf, ssrc, samples_done); + } else { + //TODO ??? + sbuf->filled += samples_done; } - sbuf->filled += samples_to_do; - vgmstream->current_sample += samples_to_do; - vgmstream->samples_into_block += samples_to_do; + vgmstream->current_sample += samples_done; + vgmstream->samples_into_block += samples_done; } return; diff --git a/src/libvgmstream.vcxproj b/src/libvgmstream.vcxproj index 5fbf4e3a..fc012222 100644 --- a/src/libvgmstream.vcxproj +++ b/src/libvgmstream.vcxproj @@ -114,6 +114,8 @@ + + @@ -269,6 +271,7 @@ + @@ -321,6 +324,7 @@ + @@ -525,6 +529,7 @@ + diff --git a/src/libvgmstream.vcxproj.filters b/src/libvgmstream.vcxproj.filters index 4f79a75d..ccdd6fef 100644 --- a/src/libvgmstream.vcxproj.filters +++ b/src/libvgmstream.vcxproj.filters @@ -176,6 +176,12 @@ coding\libs\Header Files + + coding\libs\Header Files + + + coding\libs\Header Files + coding\libs\Header Files @@ -637,6 +643,9 @@ coding\Source Files + + coding\Source Files + coding\Source Files @@ -793,6 +802,9 @@ coding\libs\Source Files + + coding\libs\Source Files + coding\libs\Source Files @@ -1405,6 +1417,9 @@ meta\Source Files + + meta\Source Files + meta\Source Files diff --git a/src/meta/ka1a.c b/src/meta/ka1a.c new file mode 100644 index 00000000..cb8190bc --- /dev/null +++ b/src/meta/ka1a.c @@ -0,0 +1,56 @@ +#include "meta.h" +#include "../coding/coding.h" + + +/* KA1A - Koei Tecmo's custom codec streams [Dynasty Warriors Origins (PC)] */ +VGMSTREAM* init_vgmstream_ka1a(STREAMFILE* sf) { + VGMSTREAM* vgmstream = NULL; + uint32_t start_offset; + + /* checks */ + if (!is_id32be(0x00,sf, "KA1A")) + return NULL; + /* .ka1a: header id */ + if (!check_extensions(sf,"ka1a")) + return NULL; + // KA1A don't seem found outside SRST, but probably will (like KOVS) + + //uint32_t data_size = read_u32le(0x04,sf); + int channels = read_s32le(0x08,sf); + int tracks = read_s32le(0x0c,sf); + int sample_rate = read_s32le(0x10,sf); + int32_t num_samples = read_s32le(0x14,sf); + int32_t loop_start = read_s32le(0x18,sf); + int32_t loop_region = read_s32le(0x1c,sf); + int bitrate_mode = read_s32le(0x20,sf); // signed! (may be negative) + // 0x28: reserved? + + bool loop_flag = (loop_region > 0); + + start_offset = 0x28; + + /* build the VGMSTREAM */ + vgmstream = allocate_vgmstream(channels * tracks, loop_flag); + if (!vgmstream) goto fail; + + vgmstream->meta_type = meta_KA1A; + vgmstream->sample_rate = sample_rate; + vgmstream->num_samples = num_samples; + vgmstream->loop_start_sample = loop_start; + vgmstream->loop_end_sample = loop_start + loop_region; //typically num_samples + + // KA1A interleaves tracks (ex. 2ch and 2 tracks = 512 stereo samples + 512 stereo samples). + // For vgmstream this is reinterpreted as plain channels like other KT formats do (codec handles + // this fine). Encoder delay is implicit. + vgmstream->codec_data = init_ka1a(bitrate_mode, channels * tracks); + if (!vgmstream->codec_data) goto fail; + vgmstream->coding_type = coding_KA1A; + vgmstream->layout_type = layout_none; + + if (!vgmstream_open_stream(vgmstream, sf, start_offset)) + goto fail; + return vgmstream; +fail: + close_vgmstream(vgmstream); + return NULL; +} diff --git a/src/meta/ktsr.c b/src/meta/ktsr.c index eb37ecb4..8aabdbc5 100644 --- a/src/meta/ktsr.c +++ b/src/meta/ktsr.c @@ -4,7 +4,7 @@ #include "../util/companion_files.h" #include "ktsr_streamfile.h" -typedef enum { NONE, MSADPCM, DSP, GCADPCM, ATRAC9, RIFF_ATRAC9, KOVS, KTSS, KTAC } ktsr_codec; +typedef enum { NONE, MSADPCM, DSP, GCADPCM, ATRAC9, RIFF_ATRAC9, KOVS, KTSS, KTAC, KA1A, KA1A_INTERNAL } ktsr_codec; #define MAX_CHANNELS 8 @@ -87,7 +87,7 @@ static VGMSTREAM* init_vgmstream_ktsr_internal(STREAMFILE* sf, bool is_srsa) { STREAMFILE* sf_b = NULL; ktsr_header ktsr = {0}; int target_subsong = sf->stream_index; - int separate_offsets = 0; + bool separate_offsets = false; ktsr.is_srsa = is_srsa; if (ktsr.is_srsa) { @@ -152,6 +152,7 @@ static VGMSTREAM* init_vgmstream_ktsr_internal(STREAMFILE* sf, bool is_srsa) { case KOVS: init_vgmstream = init_vgmstream_ogg_vorbis; ext = "kvs"; break; case KTSS: init_vgmstream = init_vgmstream_ktss; ext = "ktss"; break; case KTAC: init_vgmstream = init_vgmstream_ktac; ext = "ktac"; break; + case KA1A: init_vgmstream = init_vgmstream_ka1a; ext = "ka1a"; break; default: break; } @@ -183,16 +184,36 @@ static VGMSTREAM* init_vgmstream_ktsr_internal(STREAMFILE* sf, bool is_srsa) { case MSADPCM: vgmstream->coding_type = coding_MSADPCM_mono; vgmstream->layout_type = layout_none; - separate_offsets = 1; + separate_offsets = true; /* 0x00: samples per frame */ vgmstream->frame_size = read_u16le(ktsr.extra_offset + 0x02, sf_b); break; + case KA1A_INTERNAL: { + // 00: bitrate mode + // XX: start offsets per channel (from hash-id start aka extra_offset - 0x48) + // XX: size per channel + // XX: padding + + int bitrate_mode = read_s32le(ktsr.extra_offset + 0x00, sf); // signed! (may be negative) + + vgmstream->codec_data = init_ka1a(bitrate_mode, ktsr.channels); + if (!vgmstream->codec_data) goto fail; + vgmstream->coding_type = coding_KA1A; + vgmstream->layout_type = layout_none; + + // mono streams handled in decoder, though needs channel offsets + flag + vgmstream->codec_config = 1; + separate_offsets = true; + + break; + } + case DSP: vgmstream->coding_type = coding_NGC_DSP; vgmstream->layout_type = layout_none; - separate_offsets = 1; + separate_offsets = true; dsp_read_coefs_le(vgmstream, sf, ktsr.extra_offset + 0x1c, 0x60); dsp_read_hist_le (vgmstream, sf, ktsr.extra_offset + 0x40, 0x60); @@ -327,12 +348,12 @@ static int parse_codec(ktsr_header* ktsr) { case 0x05: /* PC/Steam [Fate/Samurai Remnant (PC)] */ if (ktsr->format == 0x0000 && !ktsr->is_external) ktsr->codec = MSADPCM; // Warrior Orochi 4 (PC) - //else if (ktsr->format == 0x0001) - // ktsr->codec = KA1A; // Dynasty Warriors Origins (PC) + else if (ktsr->format == 0x0001) + ktsr->codec = KA1A_INTERNAL; // Dynasty Warriors Origins (PC) else if (ktsr->format == 0x0005 && ktsr->is_external) ktsr->codec = KOVS; // Atelier Ryza (PC) - //else if (ktsr->format == 0x1001 && ktsr->is_external) - // ktsr->codec = KA1A; // Dynasty Warriors Origins (PC) + else if (ktsr->format == 0x1001 && ktsr->is_external) + ktsr->codec = KA1A; // Dynasty Warriors Origins (PC) else goto fail; break; @@ -377,7 +398,8 @@ static bool parse_ktsr_subfile(ktsr_header* ktsr, STREAMFILE* sf, uint32_t offse type = read_u32be(offset + 0x00, sf); /* hash-id? */ //size = read_u32le(offset + 0x04, sf); - /* probably could check the flag in sound header, but the format is kinda messy */ + // probably could check the flags in sound header, but the format is kinda messy + // (all these numbers are surely LE hashes of something) switch(type) { case 0x38D0437D: /* external [Nioh (PC/PS4), Atelier Ryza (PC)] */ diff --git a/src/meta/meta.h b/src/meta/meta.h index 71593e20..9515ec48 100644 --- a/src/meta/meta.h +++ b/src/meta/meta.h @@ -1013,4 +1013,6 @@ VGMSTREAM* init_vgmstream_adp_ongakukan(STREAMFILE* sf); VGMSTREAM* init_vgmstream_sdd(STREAMFILE* sf); +VGMSTREAM* init_vgmstream_ka1a(STREAMFILE* sf); + #endif /*_META_H*/ diff --git a/src/vgmstream.c b/src/vgmstream.c index 9780c359..6006dafe 100644 --- a/src/vgmstream.c +++ b/src/vgmstream.c @@ -225,10 +225,8 @@ VGMSTREAM* allocate_vgmstream(int channels, int loop_flag) { vgmstream->mixer = mixer_init(vgmstream->channels); /* pre-init */ if (!vgmstream->mixer) goto fail; -#if VGM_TEST_DECODER vgmstream->decode_state = decode_init(); if (!vgmstream->decode_state) goto fail; -#endif //TODO: improve/init later to minimize memory /* garbage buffer for seeking/discarding (local bufs may cause stack overflows with segments/layers) @@ -420,9 +418,7 @@ static bool merge_vgmstream(VGMSTREAM* opened_vgmstream, VGMSTREAM* new_vgmstrea opened_vgmstream->layout_type = layout_none; /* fixes some odd cases */ /* discard the second VGMSTREAM */ -#if VGM_TEST_DECODER decode_free(new_vgmstream); -#endif mixer_free(new_vgmstream->mixer); free(new_vgmstream->tmpbuf); free(new_vgmstream->start_vgmstream); diff --git a/src/vgmstream.h b/src/vgmstream.h index 10d522fc..5b2eb4f1 100644 --- a/src/vgmstream.h +++ b/src/vgmstream.h @@ -242,9 +242,7 @@ typedef struct { void* tmpbuf; /* garbage buffer used for seeking/trimming */ size_t tmpbuf_size; /* for all channels (samples = tmpbuf_size / channels / sample_size) */ -#if VGM_TEST_DECODER - void* decode_state; /* for some decoders (TO-DO: to be mover around) */ -#endif + void* decode_state; /* for some decoders (TO-DO: to be moved around) */ } VGMSTREAM; diff --git a/src/vgmstream_init.c b/src/vgmstream_init.c index ceef04b0..4ae37ddd 100644 --- a/src/vgmstream_init.c +++ b/src/vgmstream_init.c @@ -510,6 +510,7 @@ init_vgmstream_t init_vgmstream_functions[] = { init_vgmstream_dsp_asura_sfx, init_vgmstream_adp_ongakukan, init_vgmstream_sdd, + init_vgmstream_ka1a, /* lower priority metas (no clean header identity, somewhat ambiguous, or need extension/companion file to identify) */ init_vgmstream_agsc, diff --git a/src/vgmstream_types.h b/src/vgmstream_types.h index ec66d657..fde1d254 100644 --- a/src/vgmstream_types.h +++ b/src/vgmstream_types.h @@ -145,6 +145,7 @@ typedef enum { coding_TAC, /* tri-Ace Codec (MDCT-based) */ coding_ICE_RANGE, /* Inti Creates "range" codec */ coding_ICE_DCT, /* Inti Creates "DCT" codec */ + coding_KA1A, /* Koei Tecmo codec (transform-based) */ #ifdef VGM_USE_VORBIS coding_OGG_VORBIS, /* Xiph Vorbis with Ogg layer (MDCT-based) */ @@ -710,6 +711,7 @@ typedef enum { meta_DSP_ASURA, meta_ONGAKUKAN_RIFF_ADP, meta_SDD, + meta_KA1A, } meta_t;