From 35f5da2ac3581fa803e4addf122225ac1badab12 Mon Sep 17 00:00:00 2001 From: bnnm Date: Sat, 5 Oct 2019 15:10:40 +0200 Subject: [PATCH] Improve performance of ADX/XA/PSX/HEVAG/DSP decoders --- src/coding/adx_decoder.c | 191 +++++++++++------------------------ src/coding/coding.h | 9 +- src/coding/ngc_dsp_decoder.c | 129 ++++++++++++++--------- src/coding/psv_decoder.c | 81 ++++++++------- src/coding/psx_decoder.c | 45 ++++++--- src/coding/xa_decoder.c | 63 ++++++------ src/vgmstream.c | 45 ++++----- 7 files changed, 267 insertions(+), 296 deletions(-) diff --git a/src/coding/adx_decoder.c b/src/coding/adx_decoder.c index f27b26d3..f906ad0f 100644 --- a/src/coding/adx_decoder.c +++ b/src/coding/adx_decoder.c @@ -1,155 +1,84 @@ #include "coding.h" #include "../util.h" -void decode_adx(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) { - int i; - int32_t sample_count; - int32_t frame_samples = (frame_bytes - 2) * 2; - - int framesin = first_sample/frame_samples; - - int32_t scale = read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile) + 1; +void decode_adx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_size, coding_t coding_type) { + uint8_t frame[0x12] = {0}; + off_t frame_offset; + int i, frames_in, sample_count = 0; + size_t bytes_per_frame, samples_per_frame; + int scale, coef1, coef2; int32_t hist1 = stream->adpcm_history1_32; int32_t hist2 = stream->adpcm_history2_32; - int coef1 = stream->adpcm_coef[0]; - int coef2 = stream->adpcm_coef[1]; - first_sample = first_sample%frame_samples; - for (i=first_sample,sample_count=0; ioffset+framesin*frame_bytes +2+i/2,stream->streamfile); + /* external interleave (fixed size), mono */ + bytes_per_frame = frame_size; + samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 32 */ + frames_in = first_sample / samples_per_frame; + first_sample = first_sample % samples_per_frame; - outbuf[sample_count] = clamp16( - (i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale + - (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12) - ); + /* parse frame header */ + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ - hist2 = hist1; - hist1 = outbuf[sample_count]; + scale = get_16bitBE(frame+0x00); + switch(coding_type) { + case coding_CRI_ADX: + scale = scale + 1; + coef1 = stream->adpcm_coef[0]; + coef2 = stream->adpcm_coef[1]; + break; + case coding_CRI_ADX_exp: + scale = 1 << (12 - scale); + coef1 = stream->adpcm_coef[0]; + coef2 = stream->adpcm_coef[1]; + break; + case coding_CRI_ADX_fixed: + scale = (scale & 0x1fff) + 1; + coef1 = stream->adpcm_coef[(frame[0] >> 5)*2 + 0]; + coef2 = stream->adpcm_coef[(frame[0] >> 5)*2 + 1]; + break; + case coding_CRI_ADX_enc_8: + case coding_CRI_ADX_enc_9: + scale = ((scale ^ stream->adx_xor) & 0x1fff) + 1; + coef1 = stream->adpcm_coef[0]; + coef2 = stream->adpcm_coef[1]; + break; + default: + scale = scale + 1; + coef1 = stream->adpcm_coef[0]; + coef2 = stream->adpcm_coef[1]; + break; } - stream->adpcm_history1_32 = hist1; - stream->adpcm_history2_32 = hist2; -} + /* decode nibbles */ + for (i = first_sample; i < first_sample + samples_to_do; i++) { + int32_t sample = 0; + uint8_t nibbles = frame[0x02 + i/2]; -void decode_adx_exp(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) { - int i; - int32_t sample_count; - int32_t frame_samples = (frame_bytes - 2) * 2; + sample = i&1 ? /* high nibble first */ + get_low_nibble_signed(nibbles): + get_high_nibble_signed(nibbles); + sample = sample * scale + (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12); + sample = clamp16(sample); - int framesin = first_sample/frame_samples; - - int32_t scale = read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile); - int32_t hist1, hist2; - int coef1, coef2; - scale = 1 << (12 - scale); - hist1 = stream->adpcm_history1_32; - hist2 = stream->adpcm_history2_32; - coef1 = stream->adpcm_coef[0]; - coef2 = stream->adpcm_coef[1]; - - first_sample = first_sample%frame_samples; - - for (i=first_sample,sample_count=0; ioffset+framesin*frame_bytes +2+i/2,stream->streamfile); - - outbuf[sample_count] = clamp16( - (i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale + - (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12) - ); + outbuf[sample_count] = sample; + sample_count += channelspacing; hist2 = hist1; - hist1 = outbuf[sample_count]; - } - - stream->adpcm_history1_32 = hist1; - stream->adpcm_history2_32 = hist2; -} - -void decode_adx_fixed(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) { - int i; - int32_t sample_count; - int32_t frame_samples = (frame_bytes - 2) * 2; - - int framesin = first_sample/frame_samples; - - int32_t scale = (read_16bitBE(stream->offset + framesin*frame_bytes, stream->streamfile) & 0x1FFF) + 1; - int32_t predictor = read_8bit(stream->offset + framesin*frame_bytes, stream->streamfile) >> 5; - int32_t hist1 = stream->adpcm_history1_32; - int32_t hist2 = stream->adpcm_history2_32; - int coef1 = stream->adpcm_coef[predictor * 2]; - int coef2 = stream->adpcm_coef[predictor * 2 + 1]; - - first_sample = first_sample%frame_samples; - - for (i=first_sample,sample_count=0; ioffset+framesin*frame_bytes +2+i/2,stream->streamfile); - - outbuf[sample_count] = clamp16( - (i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale + - (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12) - ); - - hist2 = hist1; - hist1 = outbuf[sample_count]; - } - - stream->adpcm_history1_32 = hist1; - stream->adpcm_history2_32 = hist2; -} - -void adx_next_key(VGMSTREAMCHANNEL * stream) -{ - stream->adx_xor = ( stream->adx_xor * stream->adx_mult + stream->adx_add ) & 0x7fff; -} - -void decode_adx_enc(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes) { - int i; - int32_t sample_count; - int32_t frame_samples = (frame_bytes - 2) * 2; - - int framesin = first_sample/frame_samples; - - int32_t scale = ((read_16bitBE(stream->offset+framesin*frame_bytes,stream->streamfile) ^ stream->adx_xor)&0x1fff) + 1; - int32_t hist1 = stream->adpcm_history1_32; - int32_t hist2 = stream->adpcm_history2_32; - int coef1 = stream->adpcm_coef[0]; - int coef2 = stream->adpcm_coef[1]; - - first_sample = first_sample%frame_samples; - - for (i=first_sample,sample_count=0; ioffset+framesin*frame_bytes +2+i/2,stream->streamfile); - - outbuf[sample_count] = clamp16( - (i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale + - (coef1 * hist1 >> 12) + (coef2 * hist2 >> 12) - ); - - hist2 = hist1; - hist1 = outbuf[sample_count]; + hist1 = sample; } stream->adpcm_history1_32 = hist1; stream->adpcm_history2_32 = hist2; - if (!(i % 32)) { - for (i=0;iadx_channels;i++) - { + if ((coding_type == coding_CRI_ADX_enc_8 || coding_type == coding_CRI_ADX_enc_9) && !(i % 32)) { + for (i =0; i < stream->adx_channels; i++) { adx_next_key(stream); } } - +} + +void adx_next_key(VGMSTREAMCHANNEL * stream) { + stream->adx_xor = (stream->adx_xor * stream->adx_mult + stream->adx_add) & 0x7fff; } diff --git a/src/coding/coding.h b/src/coding/coding.h index 9b1f378d..6277f244 100644 --- a/src/coding/coding.h +++ b/src/coding/coding.h @@ -4,10 +4,7 @@ #include "../vgmstream.h" /* adx_decoder */ -void decode_adx(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes); -void decode_adx_exp(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes); -void decode_adx_fixed(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes); -void decode_adx_enc(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes); +void decode_adx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int32_t frame_bytes, coding_t coding_type); void adx_next_key(VGMSTREAMCHANNEL * stream); /* g721_decoder */ @@ -92,10 +89,10 @@ size_t ps_cfg_bytes_to_samples(size_t bytes, size_t frame_size, int channels); int ps_check_format(STREAMFILE *streamFile, off_t offset, size_t max); /* psv_decoder */ -void decode_hevag(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do); +void decode_hevag(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do); /* xa_decoder */ -void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel); +void decode_xa(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel); size_t xa_bytes_to_samples(size_t bytes, int channels, int is_blocked); /* ea_xa_decoder */ diff --git a/src/coding/ngc_dsp_decoder.c b/src/coding/ngc_dsp_decoder.c index 773e9612..6f7fe22c 100644 --- a/src/coding/ngc_dsp_decoder.c +++ b/src/coding/ngc_dsp_decoder.c @@ -1,69 +1,103 @@ #include "coding.h" #include "../util.h" + void decode_ngc_dsp(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) { - int i=first_sample; - int32_t sample_count; - - int framesin = first_sample/14; - - int8_t header = read_8bit(framesin*8+stream->offset,stream->streamfile); - int32_t scale = 1 << (header & 0xf); - int coef_index = (header >> 4) & 0xf; + uint8_t frame[0x08] = {0}; + off_t frame_offset; + int i, frames_in, sample_count = 0; + size_t bytes_per_frame, samples_per_frame; + int coef_index, scale, coef1, coef2; int32_t hist1 = stream->adpcm_history1_16; int32_t hist2 = stream->adpcm_history2_16; - int coef1 = stream->adpcm_coef[coef_index*2]; - int coef2 = stream->adpcm_coef[coef_index*2+1]; - first_sample = first_sample%14; - for (i=first_sample,sample_count=0; ioffset+1+i/2,stream->streamfile); + /* external interleave (fixed size), mono */ + bytes_per_frame = 0x08; + samples_per_frame = (bytes_per_frame - 0x01) * 2; /* always 14 */ + frames_in = first_sample / samples_per_frame; + first_sample = first_sample % samples_per_frame; - outbuf[sample_count] = clamp16(( - (((i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale)<<11) + 1024 + - (coef1 * hist1 + coef2 * hist2))>>11 - ); + /* parse frame header */ + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + scale = 1 << ((frame[0] >> 0) & 0xf); + coef_index = (frame[0] >> 4) & 0xf; + + VGM_ASSERT_ONCE(coef_index > 8, "DSP: incorrect coefs at %x\n", (uint32_t)frame_offset); + //if (coef_index > 8) //todo not correctly clamped in original decoder? + // coef_index = 8; + + coef1 = stream->adpcm_coef[coef_index*2 + 0]; + coef2 = stream->adpcm_coef[coef_index*2 + 1]; + + + /* decode nibbles */ + for (i = first_sample; i < first_sample + samples_to_do; i++) { + int32_t sample = 0; + uint8_t nibbles = frame[0x01 + i/2]; + + sample = i&1 ? /* high nibble first */ + get_low_nibble_signed(nibbles) : + get_high_nibble_signed(nibbles); + sample = ((sample * scale) << 11); + sample = (sample + 1024 + coef1*hist1 + coef2*hist2) >> 11; + sample = clamp16(sample); + + outbuf[sample_count] = sample; + sample_count += channelspacing; hist2 = hist1; - hist1 = outbuf[sample_count]; + hist1 = sample; } stream->adpcm_history1_16 = hist1; stream->adpcm_history2_16 = hist2; } -/* read from memory rather than a file */ -static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, uint8_t * mem) { - int i=first_sample; - int32_t sample_count; - int8_t header = mem[0]; - int32_t scale = 1 << (header & 0xf); - int coef_index = (header >> 4) & 0xf; +/* read from memory rather than a file */ +static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, uint8_t * frame) { + int i, sample_count = 0; + size_t bytes_per_frame, samples_per_frame; + int coef_index, scale, coef1, coef2; int32_t hist1 = stream->adpcm_history1_16; int32_t hist2 = stream->adpcm_history2_16; - int coef1 = stream->adpcm_coef[coef_index*2]; - int coef2 = stream->adpcm_coef[coef_index*2+1]; - first_sample = first_sample%14; - for (i=first_sample,sample_count=0; i samples_per_frame, "DSP: layout error, too many samples\n"); - outbuf[sample_count] = clamp16(( - (((i&1? - get_low_nibble_signed(sample_byte): - get_high_nibble_signed(sample_byte) - ) * scale)<<11) + 1024 + - (coef1 * hist1 + coef2 * hist2))>>11 - ); + /* parse frame header */ + scale = 1 << ((frame[0] >> 0) & 0xf); + coef_index = (frame[0] >> 4) & 0xf; + + VGM_ASSERT_ONCE(coef_index > 8, "DSP: incorrect coefs\n"); + //if (coef_index > 8) //todo not correctly clamped in original decoder? + // coef_index = 8; + + coef1 = stream->adpcm_coef[coef_index*2 + 0]; + coef2 = stream->adpcm_coef[coef_index*2 + 1]; + + for (i = first_sample; i < first_sample + samples_to_do; i++) { + int32_t sample = 0; + uint8_t nibbles = frame[0x01 + i/2]; + + sample = i&1 ? + get_low_nibble_signed(nibbles) : + get_high_nibble_signed(nibbles); + sample = ((sample * scale) << 11); + sample = (sample + 1024 + coef1*hist1 + coef2*hist2) >> 11; + sample = clamp16(sample); + + outbuf[sample_count] = sample; + sample_count += channelspacing; hist2 = hist1; - hist1 = outbuf[sample_count]; + hist1 = sample; } stream->adpcm_history1_16 = hist1; @@ -72,22 +106,21 @@ static void decode_ngc_dsp_subint_internal(VGMSTREAMCHANNEL * stream, sample_t * /* decode DSP with byte-interleaved frames (ex. 0x08: 1122112211221122) */ void decode_ngc_dsp_subint(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel, int interleave) { - uint8_t sample_data[0x08]; + uint8_t frame[0x08]; int i; + int frames_in = first_sample / 14; - int framesin = first_sample/14; - - for (i=0; i < 0x08; i++) { + for (i = 0; i < 0x08; i++) { /* base + current frame + subint section + subint byte + channel adjust */ - sample_data[i] = read_8bit( + frame[i] = read_8bit( stream->offset - + framesin*(0x08*channelspacing) + + frames_in*(0x08*channelspacing) + i/interleave * interleave * channelspacing + i%interleave + interleave * channel, stream->streamfile); } - decode_ngc_dsp_subint_internal(stream, outbuf, channelspacing, first_sample, samples_to_do, sample_data); + decode_ngc_dsp_subint_internal(stream, outbuf, channelspacing, first_sample, samples_to_do, frame); } diff --git a/src/coding/psv_decoder.c b/src/coding/psv_decoder.c index 7136c837..a6d1b941 100644 --- a/src/coding/psv_decoder.c +++ b/src/coding/psv_decoder.c @@ -3,7 +3,7 @@ #include "../util.h" /* PSVita ADPCM table */ -static const int16_t HEVAG_coefs[128][4] = { +static const int16_t hevag_coefs[128][4] = { { 0, 0, 0, 0 }, { 7680, 0, 0, 0 }, { 14720, -6656, 0, 0 }, @@ -141,59 +141,58 @@ static const int16_t HEVAG_coefs[128][4] = { * * Original research and algorithm by id-daemon / daemon1. */ -void decode_hevag(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) { - - uint8_t predict_nr, shift, flag, byte; - int32_t scale = 0; - - int32_t sample; +void decode_hevag(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do) { + uint8_t frame[0x10] = {0}; + off_t frame_offset; + int i, frames_in, sample_count = 0; + size_t bytes_per_frame, samples_per_frame; + int coef_index, shift_factor, flag; int32_t hist1 = stream->adpcm_history1_32; int32_t hist2 = stream->adpcm_history2_32; int32_t hist3 = stream->adpcm_history3_32; int32_t hist4 = stream->adpcm_history4_32; - int i, sample_count; + /* external interleave (fixed size), mono */ + bytes_per_frame = 0x10; + samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 28 */ + frames_in = first_sample / samples_per_frame; + first_sample = first_sample % samples_per_frame; - int framesin = first_sample / 28; + /* parse frame header */ + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + coef_index = (frame[0] >> 4) & 0xf; + shift_factor = (frame[0] >> 0) & 0xf; + coef_index = ((frame[1] >> 0) & 0xf0) | coef_index; + flag = (frame[1] >> 0) & 0xf; /* same flags */ - /* 4 byte header: predictor = 3rd and 1st, shift = 2nd, flag = 4th */ - byte = (uint8_t)read_8bit(stream->offset+framesin*16+0,stream->streamfile); - predict_nr = byte >> 4; - shift = byte & 0x0f; - byte = (uint8_t)read_8bit(stream->offset+framesin*16+1,stream->streamfile); - predict_nr = (byte & 0xF0) | predict_nr; - flag = byte & 0x0f; /* no change in flags */ + VGM_ASSERT_ONCE(coef_index > 127 || shift_factor > 12, "HEVAG: in+correct coefs/shift at %x\n", (uint32_t)frame_offset); + if (coef_index > 127) + coef_index = 127; /* ? */ + if (shift_factor > 12) + shift_factor = 9; /* ? */ - first_sample = first_sample % 28; + /* decode nibbles */ + for (i = first_sample; i < first_sample + samples_to_do; i++) { + int32_t sample = 0, scale = 0; - if (first_sample & 1) { /* if first sample is odd, read byte first */ - byte = read_8bit(stream->offset+(framesin*16)+2+first_sample/2,stream->streamfile); - } + if (flag < 0x07) { /* with flag 0x07 decoded sample must be 0 */ + uint8_t nibbles = frame[0x02 + i/2]; - for (i = first_sample, sample_count = 0; i < first_sample + samples_to_do; i++, sample_count += channelspacing) { - sample = 0; - - if (flag < 7 && predict_nr < 128) { - - if (i & 1) {/* odd/even nibble */ - scale = byte >> 4; - } else { - byte = read_8bit(stream->offset+(framesin*16)+2+i/2,stream->streamfile); - scale = byte & 0x0f; - } - if (scale > 7) { /* sign extend */ - scale = scale - 16; - } - - sample = (hist1 * HEVAG_coefs[predict_nr][0] + - hist2 * HEVAG_coefs[predict_nr][1] + - hist3 * HEVAG_coefs[predict_nr][2] + - hist4 * HEVAG_coefs[predict_nr][3] ) / 32; - sample = (sample + (scale << (20 - shift)) + 128) >> 8; + scale = i&1 ? /* low nibble first */ + get_high_nibble_signed(nibbles): + get_low_nibble_signed(nibbles); + sample = (hist1 * hevag_coefs[coef_index][0] + + hist2 * hevag_coefs[coef_index][1] + + hist3 * hevag_coefs[coef_index][2] + + hist4 * hevag_coefs[coef_index][3] ) / 32; + sample = (sample + (scale << (20 - shift_factor)) + 128) >> 8; } - outbuf[sample_count] = clamp16(sample); + outbuf[sample_count] = sample; + sample_count += channelspacing; + hist4 = hist3; hist3 = hist2; hist2 = hist1; diff --git a/src/coding/psx_decoder.c b/src/coding/psx_decoder.c index f5e7ebd4..1107d097 100644 --- a/src/coding/psx_decoder.c +++ b/src/coding/psx_decoder.c @@ -2,7 +2,7 @@ /* PS-ADPCM table, defined as rational numbers (as in the spec) */ -static const double ps_adpcm_coefs_f[5][2] = { +static const float ps_adpcm_coefs_f[5][2] = { { 0.0 , 0.0 }, //{ 0.0 , 0.0 }, { 0.9375 , 0.0 }, //{ 60.0 / 64.0 , 0.0 }, { 1.796875 , -0.8125 }, //{ 115.0 / 64.0 , -52.0 / 64.0 }, @@ -44,6 +44,7 @@ static const int ps_adpcm_coefs_i[5][2] = { /* standard PS-ADPCM (float math version) */ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int is_badflags) { + uint8_t frame[0x10] = {0}; off_t frame_offset; int i, frames_in, sample_count = 0; size_t bytes_per_frame, samples_per_frame; @@ -51,6 +52,7 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing int32_t hist1 = stream->adpcm_history1_32; int32_t hist2 = stream->adpcm_history2_32; + /* external interleave (fixed size), mono */ bytes_per_frame = 0x10; samples_per_frame = (bytes_per_frame - 0x02) * 2; /* always 28 */ @@ -58,10 +60,11 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing first_sample = first_sample % samples_per_frame; /* parse frame header */ - frame_offset = stream->offset + bytes_per_frame*frames_in; - coef_index = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf; - shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf; - flag = (uint8_t)read_8bit(frame_offset+0x01,stream->streamfile); /* only lower nibble needed */ + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + coef_index = (frame[0] >> 4) & 0xf; + shift_factor = (frame[0] >> 0) & 0xf; + flag = frame[1]; /* only lower nibble needed */ VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset); if (coef_index > 5) /* needed by inFamous (PS3) (maybe it's supposed to use more filters?) */ @@ -73,18 +76,19 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing flag = 0; VGM_ASSERT_ONCE(flag > 7,"PS-ADPCM: unknown flag at %x\n", (uint32_t)frame_offset); /* meta should use PSX-badflags */ + /* decode nibbles */ for (i = first_sample; i < first_sample + samples_to_do; i++) { int32_t sample = 0; if (flag < 0x07) { /* with flag 0x07 decoded sample must be 0 */ - uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x02+i/2,stream->streamfile); + uint8_t nibbles = frame[0x02 + i/2]; sample = i&1 ? /* low nibble first */ (nibbles >> 4) & 0x0f : (nibbles >> 0) & 0x0f; sample = (int16_t)((sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */ - sample = (int)(sample + ps_adpcm_coefs_f[coef_index][0]*hist1 + ps_adpcm_coefs_f[coef_index][1]*hist2); + sample = (int32_t)(sample + ps_adpcm_coefs_f[coef_index][0]*hist1 + ps_adpcm_coefs_f[coef_index][1]*hist2); sample = clamp16(sample); } @@ -105,6 +109,7 @@ void decode_psx(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing * * Uses int math to decode, which seems more likely (based on FF XI PC's code in Moogle Toolbox). */ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int frame_size) { + uint8_t frame[0x50] = {0}; off_t frame_offset; int i, frames_in, sample_count = 0; size_t bytes_per_frame, samples_per_frame; @@ -112,6 +117,7 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c int32_t hist1 = stream->adpcm_history1_32; int32_t hist2 = stream->adpcm_history2_32; + /* external interleave (variable size), mono */ bytes_per_frame = frame_size; samples_per_frame = (bytes_per_frame - 0x01) * 2; @@ -119,9 +125,10 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c first_sample = first_sample % samples_per_frame; /* parse frame header */ - frame_offset = stream->offset + bytes_per_frame*frames_in; - coef_index = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf; - shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf; + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + coef_index = (frame[0] >> 4) & 0xf; + shift_factor = (frame[0] >> 0) & 0xf; VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset); if (coef_index > 5) /* needed by Afrika (PS3) (maybe it's supposed to use more filters?) */ @@ -129,10 +136,11 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c if (shift_factor > 12) shift_factor = 9; /* supposedly, from Nocash PSX docs */ + /* decode nibbles */ for (i = first_sample; i < first_sample + samples_to_do; i++) { int32_t sample = 0; - uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x01+i/2,stream->streamfile); + uint8_t nibbles = frame[0x01 + i/2]; sample = i&1 ? /* low nibble first */ (nibbles >> 4) & 0x0f : @@ -154,6 +162,7 @@ void decode_psx_configurable(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int c /* PS-ADPCM from Pivotal games, exactly like psx_cfg but with float math (reverse engineered from the exe) */ void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int frame_size) { + uint8_t frame[0x50] = {0}; off_t frame_offset; int i, frames_in, sample_count = 0; size_t bytes_per_frame, samples_per_frame; @@ -162,6 +171,7 @@ void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channe int32_t hist2 = stream->adpcm_history2_32; float scale; + /* external interleave (variable size), mono */ bytes_per_frame = frame_size; samples_per_frame = (bytes_per_frame - 0x01) * 2; @@ -169,21 +179,24 @@ void decode_psx_pivotal(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channe first_sample = first_sample % samples_per_frame; /* parse frame header */ - frame_offset = stream->offset + bytes_per_frame*frames_in; - coef_index = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 4) & 0xf; - shift_factor = ((uint8_t)read_8bit(frame_offset+0x00,stream->streamfile) >> 0) & 0xf; + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + coef_index = (frame[0] >> 4) & 0xf; + shift_factor = (frame[0] >> 0) & 0xf; - VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM: incorrect coefs/shift at %x\n", (uint32_t)frame_offset); + VGM_ASSERT_ONCE(coef_index > 5 || shift_factor > 12, "PS-ADPCM-piv: incorrect coefs/shift\n"); if (coef_index > 5) /* just in case */ coef_index = 5; if (shift_factor > 12) /* same */ shift_factor = 12; + scale = (float)(1.0 / (double)(1 << shift_factor)); + /* decode nibbles */ for (i = first_sample; i < first_sample + samples_to_do; i++) { int32_t sample = 0; - uint8_t nibbles = (uint8_t)read_8bit(frame_offset+0x01+i/2,stream->streamfile); + uint8_t nibbles = frame[0x01 + i/2]; sample = !(i&1) ? /* low nibble first */ (nibbles >> 0) & 0x0f : diff --git a/src/coding/xa_decoder.c b/src/coding/xa_decoder.c index 58c3c21a..6577707d 100644 --- a/src/coding/xa_decoder.c +++ b/src/coding/xa_decoder.c @@ -6,11 +6,13 @@ // May be implemented like the SNES/SPC700 BRR. /* XA ADPCM gain values */ -static const double K0[4] = { 0.0, 0.9375, 1.796875, 1.53125 }; -static const double K1[4] = { 0.0, 0.0, -0.8125,-0.859375}; -/* K0/1 floats to int, K*2^10 = K*(1<<10) = K*1024 */ -static int get_IK0(int fid) { return ((int)((-K0[fid]) * (1 << 10))); } -static int get_IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); } +#if 0 +static const float K0[4] = { 0.0, 0.9375, 1.796875, 1.53125 }; +static const float K1[4] = { 0.0, 0.0, -0.8125, -0.859375 }; +#endif +/* K0/1 floats to int, -K*2^10 = -K*(1<<10) = -K*1024 */ +static const int IK0[4] = { 0, -960, -1840, -1568 }; +static const int IK1[4] = { 0, 0, 832, 880 }; /* Sony XA ADPCM, defined for CD-DA/CD-i in the "Red Book" (private) or "Green Book" (public) specs. * The algorithm basically is BRR (Bit Rate Reduction) from the SNES SPC700, while the data layout is new. @@ -40,18 +42,14 @@ static int get_IK1(int fid) { return ((int)((-K1[fid]) * (1 << 10))); } * (bsnes): https://gitlab.com/higan/higan/blob/master/higan/sfc/dsp/brr.cpp */ -void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) { - off_t frame_offset, sp_offset; - int i,j, frames_in, samples_done = 0, sample_count = 0; +void decode_xa(VGMSTREAMCHANNEL * stream, sample_t * outbuf, int channelspacing, int32_t first_sample, int32_t samples_to_do, int channel) { + uint8_t frame[0x80] = {0}; + off_t frame_offset; + int i,j, sp_pos, frames_in, samples_done = 0, sample_count = 0; size_t bytes_per_frame, samples_per_frame; int32_t hist1 = stream->adpcm_history1_32; int32_t hist2 = stream->adpcm_history2_32; - /* external interleave (fixed size), mono/stereo */ - bytes_per_frame = 0x80; - samples_per_frame = 28*8 / channelspacing; - frames_in = first_sample / samples_per_frame; - first_sample = first_sample % samples_per_frame; /* data layout (mono): * - CD-XA audio is divided into sectors ("audio blocks"), each with 18 size 0x80 frames @@ -72,12 +70,19 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i * ... * subframe 7: header @ 0x0b or 0x0f, 28 nibbles (high) @ 0x13,17,1b,1f,23 ... 7f */ - frame_offset = stream->offset + bytes_per_frame*frames_in; - if (read_32bitBE(frame_offset+0x00,stream->streamfile) != read_32bitBE(frame_offset+0x04,stream->streamfile) || - read_32bitBE(frame_offset+0x08,stream->streamfile) != read_32bitBE(frame_offset+0x0c,stream->streamfile)) { - VGM_LOG("bad frames at %x\n", (uint32_t)frame_offset); - } + /* external interleave (fixed size), mono/stereo */ + bytes_per_frame = 0x80; + samples_per_frame = 28*8 / channelspacing; + frames_in = first_sample / samples_per_frame; + first_sample = first_sample % samples_per_frame; + + /* parse frame header */ + frame_offset = stream->offset + bytes_per_frame * frames_in; + read_streamfile(frame, frame_offset, bytes_per_frame, stream->streamfile); /* ignore EOF errors */ + + VGM_ASSERT(get_32bitBE(frame+0x0) != get_32bitBE(frame+0x4) || get_32bitBE(frame+0x8) != get_32bitBE(frame+0xC), + "bad frames at %x\n", (uint32_t)frame_offset); /* decode subframes */ @@ -86,18 +91,18 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i uint8_t coef_index, shift_factor; /* parse current subframe (sound unit)'s header (sound parameters) */ - sp_offset = frame_offset + 0x04 + i*channelspacing + channel; - coef_index = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 4) & 0xf; - shift_factor = ((uint8_t)read_8bit(sp_offset,stream->streamfile) >> 0) & 0xf; + sp_pos = 0x04 + i*channelspacing + channel; + coef_index = (frame[sp_pos] >> 4) & 0xf; + shift_factor = (frame[sp_pos] >> 0) & 0xf; - VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %x\n", (uint32_t)sp_offset); + VGM_ASSERT(coef_index > 4 || shift_factor > 12, "XA: incorrect coefs/shift at %x\n", (uint32_t)frame_offset + sp_pos); if (coef_index > 4) coef_index = 0; /* only 4 filters are used, rest is apparently 0 */ if (shift_factor > 12) shift_factor = 9; /* supposedly, from Nocash PSX docs */ - coef1 = get_IK0(coef_index); - coef2 = get_IK1(coef_index); + coef1 = IK0[coef_index]; + coef2 = IK1[coef_index]; /* decode subframe nibbles */ @@ -105,9 +110,9 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i uint8_t nibbles; int32_t new_sample; - off_t su_offset = (channelspacing==1) ? - frame_offset + 0x10 + j*0x04 + (i/2) : /* mono */ - frame_offset + 0x10 + j*0x04 + i; /* stereo */ + int su_pos = (channelspacing==1) ? + 0x10 + j*0x04 + (i/2) : /* mono */ + 0x10 + j*0x04 + i; /* stereo */ int get_high_nibble = (channelspacing==1) ? (i&1) : /* mono (even subframes = low, off subframes = high) */ (channel == 1); /* stereo (L channel / even subframes = low, R channel / odd subframes = high) */ @@ -118,11 +123,11 @@ void decode_xa(VGMSTREAMCHANNEL * stream, sample * outbuf, int channelspacing, i continue; } - nibbles = (uint8_t)read_8bit(su_offset,stream->streamfile); + nibbles = frame[su_pos]; new_sample = get_high_nibble ? (nibbles >> 4) & 0x0f : - (nibbles ) & 0x0f; + (nibbles >> 0) & 0x0f; new_sample = (int16_t)((new_sample << 12) & 0xf000) >> shift_factor; /* 16b sign extend + scale */ new_sample = new_sample << 4; diff --git a/src/vgmstream.c b/src/vgmstream.c index b7223522..fc24f919 100644 --- a/src/vgmstream.c +++ b/src/vgmstream.c @@ -1495,37 +1495,15 @@ void decode_vgmstream(VGMSTREAM * vgmstream, int samples_written, int samples_to switch (vgmstream->coding_type) { case coding_CRI_ADX: - for (ch = 0; ch < vgmstream->channels; ch++) { - decode_adx(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch, - vgmstream->channels,vgmstream->samples_into_block,samples_to_do, - vgmstream->interleave_block_size); - } - - break; case coding_CRI_ADX_exp: - for (ch = 0; ch < vgmstream->channels; ch++) { - decode_adx_exp(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch, - vgmstream->channels,vgmstream->samples_into_block,samples_to_do, - vgmstream->interleave_block_size); - } - - break; case coding_CRI_ADX_fixed: - for (ch = 0; ch < vgmstream->channels; ch++) { - decode_adx_fixed(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch, - vgmstream->channels,vgmstream->samples_into_block,samples_to_do, - vgmstream->interleave_block_size); - } - - break; case coding_CRI_ADX_enc_8: case coding_CRI_ADX_enc_9: for (ch = 0; ch < vgmstream->channels; ch++) { - decode_adx_enc(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch, + decode_adx(&vgmstream->ch[ch],buffer+samples_written*vgmstream->channels+ch, vgmstream->channels,vgmstream->samples_into_block,samples_to_do, - vgmstream->interleave_block_size); + vgmstream->interleave_block_size, vgmstream->coding_type); } - break; case coding_NGC_DSP: for (ch = 0; ch < vgmstream->channels; ch++) { @@ -2417,7 +2395,7 @@ void describe_vgmstream(VGMSTREAM * vgmstream, char * desc, int length) { } /* codecs with configurable frame size */ - if (vgmstream->layout_type == layout_none && vgmstream->interleave_block_size > 0) { + if (vgmstream->interleave_block_size > 0) { switch (vgmstream->coding_type) { case coding_MSADPCM: case coding_MSADPCM_int: @@ -2813,6 +2791,23 @@ int vgmstream_open_stream(VGMSTREAM * vgmstream, STREAMFILE *streamFile, off_t s return 1; #endif + if ((vgmstream->coding_type == coding_PSX_cfg || + vgmstream->coding_type == coding_PSX_pivotal) && + (vgmstream->interleave_block_size == 0 || vgmstream->interleave_block_size > 0x50)) { + VGM_LOG("VGMSTREAM: PSX-cfg decoder with wrong frame size %x\n", vgmstream->interleave_block_size); + return 0; + } + + if ((vgmstream->coding_type == coding_CRI_ADX || + vgmstream->coding_type == coding_CRI_ADX_enc_8 || + vgmstream->coding_type == coding_CRI_ADX_enc_9 || + vgmstream->coding_type == coding_CRI_ADX_exp || + vgmstream->coding_type == coding_CRI_ADX_fixed) && + (vgmstream->interleave_block_size == 0 || vgmstream->interleave_block_size > 0x12)) { + VGM_LOG("VGMSTREAM: ADX decoder with wrong frame size %x\n", vgmstream->interleave_block_size); + return 0; + } + /* if interleave is big enough keep a buffer per channel */ if (vgmstream->interleave_block_size * vgmstream->channels >= STREAMFILE_DEFAULT_BUFFER_SIZE) { use_streamfile_per_channel = 1;