From f4c3009a000f000d05c0dd1f1759579735333533 Mon Sep 17 00:00:00 2001 From: bnnm Date: Tue, 19 Oct 2021 00:35:29 +0200 Subject: [PATCH] hca: optimize key test --- src/coding/hca_decoder_clhca.c | 131 +++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 49 deletions(-) diff --git a/src/coding/hca_decoder_clhca.c b/src/coding/hca_decoder_clhca.c index 09294066..4a999979 100644 --- a/src/coding/hca_decoder_clhca.c +++ b/src/coding/hca_decoder_clhca.c @@ -57,9 +57,9 @@ #define HCA_MAX_FRAME_SIZE 0xFFFF /* lib max */ #define HCA_MASK 0x7F7F7F7F /* chunk obfuscation when the HCA is encrypted with key */ -#define HCA_SUBFRAMES_PER_FRAME 8 +#define HCA_SUBFRAMES 8 #define HCA_SAMPLES_PER_SUBFRAME 128 /* also spectrum points/etc */ -#define HCA_SAMPLES_PER_FRAME (HCA_SUBFRAMES_PER_FRAME*HCA_SAMPLES_PER_SUBFRAME) +#define HCA_SAMPLES_PER_FRAME (HCA_SUBFRAMES*HCA_SAMPLES_PER_SUBFRAME) #define HCA_MDCT_BITS 7 /* (1<<7) = 128 */ #define HCA_MIN_CHANNELS 1 @@ -88,7 +88,7 @@ typedef struct stChannel { unsigned int coded_count; /* encoded scales/resolutions/coefs */ /* subframe state */ - unsigned char intensity[HCA_SUBFRAMES_PER_FRAME]; /* intensity indexes for joins stereo (value max: 15 / 4b) */ + unsigned char intensity[HCA_SUBFRAMES]; /* intensity indexes for joins stereo (value max: 15 / 4b) */ unsigned char scalefactors[HCA_SAMPLES_PER_SUBFRAME]; /* scale indexes (value max: 64 / 6b)*/ unsigned char resolution[HCA_SAMPLES_PER_SUBFRAME]; /* resolution indexes (value max: 15 / 4b) */ unsigned char noises[HCA_SAMPLES_PER_SUBFRAME]; /* indexes to coefs that need noise fill + coefs that don't (value max: 128 / 8b) */ @@ -96,13 +96,14 @@ typedef struct stChannel { unsigned int valid_count; /* resolutions with valid values saved in 'noises' */ float gain[HCA_SAMPLES_PER_SUBFRAME]; /* gain to apply to quantized spectral data */ - float spectra[HCA_SAMPLES_PER_SUBFRAME]; /* resulting dequantized data */ + float spectra[HCA_SUBFRAMES][HCA_SAMPLES_PER_SUBFRAME]; /* resulting dequantized data */ + float temp[HCA_SAMPLES_PER_SUBFRAME]; /* temp for DCT-IV */ float dct[HCA_SAMPLES_PER_SUBFRAME]; /* result of DCT-IV */ float imdct_previous[HCA_SAMPLES_PER_SUBFRAME]; /* IMDCT */ /* frame state */ - float wave[HCA_SUBFRAMES_PER_FRAME][HCA_SAMPLES_PER_SUBFRAME]; /* resulting samples */ + float wave[HCA_SUBFRAMES][HCA_SAMPLES_PER_SUBFRAME]; /* resulting samples */ } stChannel; typedef struct clHCA { @@ -333,7 +334,7 @@ void clHCA_ReadSamples16(clHCA* hca, signed short *samples) { unsigned int i, j, k; /* PCM output is generally unused, but lib functions seem to use SIMD for f32 to s32 + round to zero */ - for (i = 0; i < HCA_SUBFRAMES_PER_FRAME; i++) { + for (i = 0; i < HCA_SUBFRAMES; i++) { for (j = 0; j < HCA_SAMPLES_PER_SUBFRAME; j++) { for (k = 0; k < hca->channels; k++) { f = hca->channel[k].wave[i][j]; @@ -989,8 +990,12 @@ void clHCA_SetKey(clHCA* hca, unsigned long long keycode) { } } +static int clHCA_DecodeBlock_unpack(clHCA* hca, void *data, unsigned int size); +static void clHCA_DecodeBlock_transform(clHCA* hca); + + int clHCA_TestBlock(clHCA* hca, void *data, unsigned int size) { - const int frame_samples = HCA_SUBFRAMES_PER_FRAME * HCA_SAMPLES_PER_SUBFRAME; + const int frame_samples = HCA_SUBFRAMES * HCA_SAMPLES_PER_SUBFRAME; const float scale = 32768.0f; unsigned int i, ch, sf, s; int status; @@ -1014,7 +1019,7 @@ int clHCA_TestBlock(clHCA* hca, void *data, unsigned int size) { } /* return if decode fails (happens often with wrong keys due to bad bitstream values) */ - status = clHCA_DecodeBlock(hca, data, size); + status = clHCA_DecodeBlock_unpack(hca, data, size); if (status < 0) return -1; @@ -1042,8 +1047,9 @@ int clHCA_TestBlock(clHCA* hca, void *data, unsigned int size) { } /* check decode results as (rarely) bad keys may still get here */ + clHCA_DecodeBlock_transform(hca); for (ch = 0; ch < hca->channels; ch++) { - for (sf = 0; sf < HCA_SUBFRAMES_PER_FRAME; sf++) { + for (sf = 0; sf < HCA_SUBFRAMES; sf++) { for (s = 0; s < HCA_SAMPLES_PER_SUBFRAME; s++) { float fsample = hca->channel[ch].wave[sf][s]; @@ -1095,15 +1101,15 @@ void clHCA_DecodeReset(clHCA * hca) { stChannel* ch = &hca->channel[i]; /* most values get overwritten during decode */ - //memset(ch->intensity, 0, sizeof(ch->intensity[0]) * HCA_SUBFRAMES_PER_FRAME); + //memset(ch->intensity, 0, sizeof(ch->intensity[0]) * HCA_SUBFRAMES); //memset(ch->scalefactors, 0, sizeof(ch->scalefactors[0]) * HCA_SAMPLES_PER_SUBFRAME); //memset(ch->resolution, 0, sizeof(ch->resolution[0]) * HCA_SAMPLES_PER_SUBFRAME); //memset(ch->gain, 0, sizeof(ch->gain[0]) * HCA_SAMPLES_PER_SUBFRAME); - //memset(ch->spectra, 0, sizeof(ch->spectra[0]) * HCA_SAMPLES_PER_SUBFRAME); + //memset(ch->spectra, 0, sizeof(ch->spectra[0]) * HCA_SUBFRAMES * HCA_SAMPLES_PER_SUBFRAME); //memset(ch->temp, 0, sizeof(ch->temp[0]) * HCA_SAMPLES_PER_SUBFRAME); //memset(ch->dct, 0, sizeof(ch->dct[0]) * HCA_SAMPLES_PER_SUBFRAME); memset(ch->imdct_previous, 0, sizeof(ch->imdct_previous[0]) * HCA_SAMPLES_PER_SUBFRAME); - //memset(ch->wave, 0, sizeof(ch->wave[0][0]) * HCA_SUBFRAMES_PER_FRAME * HCA_SUBFRAMES_PER_FRAME); + //memset(ch->wave, 0, sizeof(ch->wave[0][0]) * HCA_SUBFRAMES * HCA_SUBFRAMES); } } @@ -1119,23 +1125,21 @@ static void calculate_resolution(stChannel* ch, unsigned int packed_noise_level, static void calculate_gain(stChannel* ch); -static void dequantize_coefficients(stChannel* ch, clData* br); +static void dequantize_coefficients(stChannel* ch, clData* br, int subframe); -static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsigned int ms_stereo, unsigned int* random_p); +static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsigned int ms_stereo, unsigned int* random_p, int subframe); static void reconstruct_high_frequency(stChannel* ch, unsigned int hfr_group_count, unsigned int bands_per_hfr_group, - unsigned int stereo_band_count, unsigned int base_band_count, unsigned int total_band_count, unsigned int version); + unsigned int stereo_band_count, unsigned int base_band_count, unsigned int total_band_count, unsigned int version, int subframe); static void apply_intensity_stereo(stChannel* ch_pair, int subframe, unsigned int base_band_count, unsigned int total_band_count); -static void apply_ms_stereo(stChannel* ch_pair, unsigned int ms_stereo, unsigned int base_band_count, unsigned int total_band_count); +static void apply_ms_stereo(stChannel* ch_pair, unsigned int ms_stereo, unsigned int base_band_count, unsigned int total_band_count, int subframe); static void imdct_transform(stChannel* ch, int subframe); -/* takes HCA data and decodes all of a frame's samples */ -//hcadecoder_decode_block -int clHCA_DecodeBlock(clHCA* hca, void *data, unsigned int size) { +static int clHCA_DecodeBlock_unpack(clHCA* hca, void *data, unsigned int size) { clData br; unsigned short sync; unsigned int subframe, ch; @@ -1180,19 +1184,29 @@ int clHCA_DecodeBlock(clHCA* hca, void *data, unsigned int size) { } /* lib seems to use a state value to skip parts (unpacking/subframe N/etc) as needed */ - for (subframe = 0; subframe < HCA_SUBFRAMES_PER_FRAME; subframe++) { + for (subframe = 0; subframe < HCA_SUBFRAMES; subframe++) { /* unpack channel data and get dequantized spectra */ for (ch = 0; ch < hca->channels; ch++){ - dequantize_coefficients(&hca->channel[ch], &br); + dequantize_coefficients(&hca->channel[ch], &br, subframe); } + /* original code transforms subframe here, but we have it for later */ + } + + return br.bit; /* numbers of read bits for validations */ +} + +static void clHCA_DecodeBlock_transform(clHCA* hca) { + unsigned int subframe, ch; + + for (subframe = 0; subframe < HCA_SUBFRAMES; subframe++) { /* restore missing bands from spectra */ for (ch = 0; ch < hca->channels; ch++) { - reconstruct_noise(&hca->channel[ch], hca->min_resolution, hca->ms_stereo, &hca->random); + reconstruct_noise(&hca->channel[ch], hca->min_resolution, hca->ms_stereo, &hca->random, subframe); reconstruct_high_frequency(&hca->channel[ch], hca->hfr_group_count, hca->bands_per_hfr_group, - hca->stereo_band_count, hca->base_band_count, hca->total_band_count, hca->version); + hca->stereo_band_count, hca->base_band_count, hca->total_band_count, hca->version, subframe); } /* restore missing joint stereo bands */ @@ -1200,7 +1214,7 @@ int clHCA_DecodeBlock(clHCA* hca, void *data, unsigned int size) { for (ch = 0; ch < hca->channels - 1; ch++) { apply_intensity_stereo(&hca->channel[ch], subframe, hca->base_band_count, hca->total_band_count); - apply_ms_stereo(&hca->channel[ch], hca->ms_stereo, hca->base_band_count, hca->total_band_count); + apply_ms_stereo(&hca->channel[ch], hca->ms_stereo, hca->base_band_count, hca->total_band_count, subframe); } } @@ -1209,9 +1223,27 @@ int clHCA_DecodeBlock(clHCA* hca, void *data, unsigned int size) { imdct_transform(&hca->channel[ch], subframe); } } +} - return br.bit; /* numbers of read bits for validations */ +/* takes HCA data and decodes all of a frame's samples */ +//hcadecoder_decode_block +int clHCA_DecodeBlock(clHCA* hca, void *data, unsigned int size) { + int res; + + /* Original HCA code doesn't separate unpack + transform (unpacks most data, + * reads a subframe's spectra, transforms that subframe. + * + * Unpacking first takes a bit more memory (1 spectra per subframe) but test keys faster + * (since unpack may fail with bad keys we can skip transform). For regular decoding, this + * way somehow is slightly faster? (~3-5%, extra compiler optimizations with reduced scope?) */ + + res = clHCA_DecodeBlock_unpack(hca, data, size); + if (res < 0) + return res; + clHCA_DecodeBlock_transform(hca); + + return res; } //-------------------------------------------------- @@ -1330,7 +1362,7 @@ static int unpack_intensity(stChannel* ch, clData* br, unsigned int hfr_group_co ch->intensity[0] = value; if (value < 15) { bitreader_skip(br, 4); - for (i = 1; i < HCA_SUBFRAMES_PER_FRAME; i++) { + for (i = 1; i < HCA_SUBFRAMES; i++) { ch->intensity[i] = bitreader_read(br, 4); } } @@ -1352,7 +1384,7 @@ static int unpack_intensity(stChannel* ch, clData* br, unsigned int hfr_group_co ch->intensity[0] = value; if (delta_bits == 3) { /* 3+1 = 4b */ /* fixed intensities */ - for (i = 1; i < HCA_SUBFRAMES_PER_FRAME; i++) { + for (i = 1; i < HCA_SUBFRAMES; i++) { ch->intensity[i] = bitreader_read(br, 4); } } @@ -1361,7 +1393,7 @@ static int unpack_intensity(stChannel* ch, clData* br, unsigned int hfr_group_co unsigned char bmax = (2 << delta_bits) - 1; unsigned char bits = delta_bits + 1; - for (i = 1; i < HCA_SUBFRAMES_PER_FRAME; i++) { + for (i = 1; i < HCA_SUBFRAMES; i++) { unsigned char delta = bitreader_read(br, bits); if (delta == bmax) { value = bitreader_read(br, 4); /* encoded */ @@ -1378,7 +1410,7 @@ static int unpack_intensity(stChannel* ch, clData* br, unsigned int hfr_group_co } else { bitreader_skip(br, 4); - for (i = 0; i < HCA_SUBFRAMES_PER_FRAME; i++) { + for (i = 0; i < HCA_SUBFRAMES; i++) { ch->intensity[i] = 7; } } @@ -1498,7 +1530,7 @@ static const float hcatbdecoder_read_val_table[128] = { }; /* read spectral coefficients in the bitstream */ -static void dequantize_coefficients(stChannel* ch, clData* br) { +static void dequantize_coefficients(stChannel* ch, clData* br, int subframe) { int i; unsigned int cc_count = ch->coded_count; @@ -1524,11 +1556,11 @@ static void dequantize_coefficients(stChannel* ch, clData* br) { } /* dequantize coef with gain */ - ch->spectra[i] = ch->gain[i] * qc; + ch->spectra[subframe][i] = ch->gain[i] * qc; } /* clean rest of spectra */ - memset(&ch->spectra[cc_count], 0, sizeof(ch->spectra[0]) * (HCA_SAMPLES_PER_SUBFRAME - cc_count)); + memset(&ch->spectra[subframe][cc_count], 0, sizeof(ch->spectra[subframe][0]) * (HCA_SAMPLES_PER_SUBFRAME - cc_count)); } @@ -1560,7 +1592,7 @@ static const float* hcadecoder_scale_conversion_table = (const float*)hcadecoder /* recreate resolution 0 coefs (not encoded) with pseudo-random noise based on * other coefs/scales (probably similar to AAC's perceptual noise substitution) */ -static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsigned int ms_stereo, unsigned int* random_p) { +static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsigned int ms_stereo, unsigned int* random_p, int subframe) { if (min_resolution > 0) /* added in v3.0 */ return; if (ch->valid_count <= 0 || ch->noise_count <= 0) @@ -1587,7 +1619,8 @@ static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsign sf_valid = ch->scalefactors[valid_index]; sc_index = (sf_noise - sf_valid + 62) & ~((sf_noise - sf_valid + 62) >> 31); - ch->spectra[noise_index] = hcadecoder_scale_conversion_table[sc_index] * ch->spectra[valid_index]; + ch->spectra[subframe][noise_index] = + hcadecoder_scale_conversion_table[sc_index] * ch->spectra[subframe][valid_index]; } *random_p = random; /* lib saves this in the bitreader, maybe for simplified passing around */ @@ -1596,7 +1629,7 @@ static void reconstruct_noise(stChannel* ch, unsigned int min_resolution, unsign /* recreate missing coefs in high bands based on lower bands (probably similar to AAC's spectral band replication) */ static void reconstruct_high_frequency(stChannel* ch, unsigned int hfr_group_count, unsigned int bands_per_hfr_group, - unsigned int stereo_band_count, unsigned int base_band_count, unsigned int total_band_count, unsigned int version) { + unsigned int stereo_band_count, unsigned int base_band_count, unsigned int total_band_count, unsigned int version, int subframe) { if (bands_per_hfr_group == 0) /* added in v2.0, skipped in v2.0 files with 0 bands too */ return; if (ch->type == STEREO_SECONDARY) @@ -1630,7 +1663,7 @@ static void reconstruct_high_frequency(stChannel* ch, unsigned int hfr_group_cou sc_index = hfr_scales[group] - ch->scalefactors[lowband] + 63; sc_index = sc_index & ~(sc_index >> 31); /* clamped in v3.0 lib (in theory 6b sf are 0..128) */ - ch->spectra[highband] = hcadecoder_scale_conversion_table[sc_index] * ch->spectra[lowband]; + ch->spectra[subframe][highband] = hcadecoder_scale_conversion_table[sc_index] * ch->spectra[subframe][lowband]; highband += 1; lowband -= lowband_sub; @@ -1638,7 +1671,7 @@ static void reconstruct_high_frequency(stChannel* ch, unsigned int hfr_group_cou } /* last spectrum coefficient is 0 (normally highband = 128, but perhaps could 'break' before) */ - ch->spectra[highband - 1] = 0.0f; + ch->spectra[subframe][highband - 1] = 0.0f; } } @@ -1661,8 +1694,8 @@ static void apply_intensity_stereo(stChannel* ch_pair, int subframe, unsigned in int band; float ratio_l = hcadecoder_intensity_ratio_table[ ch_pair[1].intensity[subframe] ]; float ratio_r = 2.0f - ratio_l; /* correct, though other decoders substract 2.0 (it does use 'fsubr 2.0' and such) */ - float* sp_l = ch_pair[0].spectra; - float* sp_r = ch_pair[1].spectra; + float* sp_l = &ch_pair[0].spectra[subframe][0]; + float* sp_r = &ch_pair[1].spectra[subframe][0]; for (band = base_band_count; band < total_band_count; band++) { float coef_l = sp_l[band] * ratio_l; @@ -1674,7 +1707,7 @@ static void apply_intensity_stereo(stChannel* ch_pair, int subframe, unsigned in } /* restore L/R bands based on mid channel + side differences */ -static void apply_ms_stereo(stChannel* ch_pair, unsigned int ms_stereo, unsigned int base_band_count, unsigned int total_band_count) { +static void apply_ms_stereo(stChannel* ch_pair, unsigned int ms_stereo, unsigned int base_band_count, unsigned int total_band_count, int subframe) { if (!ms_stereo) /* added in v3.0 */ return; if (ch_pair[0].type != STEREO_PRIMARY) @@ -1683,8 +1716,8 @@ static void apply_ms_stereo(stChannel* ch_pair, unsigned int ms_stereo, unsigned { int band; const float ratio = 0.70710676908493; /* 0x3F3504F3 */ - float* sp_l = ch_pair[0].spectra; - float* sp_r = ch_pair[1].spectra; + float* sp_l = &ch_pair[0].spectra[subframe][0]; + float* sp_r = &ch_pair[1].spectra[subframe][0]; for (band = base_band_count; band < total_band_count; band++) { float coef_l = (sp_l[band] + sp_r[band]) * ratio; @@ -1867,8 +1900,8 @@ static void imdct_transform(stChannel* ch, int subframe) { { unsigned int count1 = 1; unsigned int count2 = half; - float* temp1 = ch->spectra; - float* temp2 = ch->temp; + float* temp1 = &ch->spectra[subframe][0]; + float* temp2 = &ch->temp[0]; for (i = 0; i < mdct_bits; i++) { float* swap; @@ -1897,8 +1930,8 @@ static void imdct_transform(stChannel* ch, int subframe) { { unsigned int count1 = half; unsigned int count2 = 1; - float* temp1 = ch->temp; - float* temp2 = ch->spectra; + float* temp1 = &ch->temp[0]; + float* temp2 = &ch->spectra[subframe][0]; for (i = 0; i < mdct_bits; i++) { const float* sin_table = (const float*) sin_tables_hex[i];//todo cleanup @@ -1934,15 +1967,15 @@ static void imdct_transform(stChannel* ch, int subframe) { /* copy dct */ /* (with the above optimization spectra is already modified, so this is redundant) */ for (i = 0; i < size; i++) { - ch->dct[i] = ch->spectra[i]; + ch->dct[i] = ch->spectra[subframe][i]; } #endif } /* update output/imdct with overlapped window (lib fuses this with the above) */ { - const float* dct = ch->spectra; //ch->dct; - const float* prev = ch->imdct_previous; + const float* dct = &ch->spectra[subframe][0]; //ch->dct; + const float* prev = &ch->imdct_previous[0]; for (i = 0; i < half; i++) { ch->wave[subframe][i] = hcaimdct_window_float[i] * dct[i + half] + prev[i];