diff --git a/ext_libs/clHCA.c b/ext_libs/clHCA.c index eb5dfdf4..7d48cd2f 100644 --- a/ext_libs/clHCA.c +++ b/ext_libs/clHCA.c @@ -325,26 +325,25 @@ int clHCA_getInfo(clHCA* hca, clHCA_stInfo *info) { return 0; } +//HCADecoder_DecodeBlockInt32 void clHCA_ReadSamples16(clHCA* hca, signed short *samples) { - const float scale = 32768.0f; + const float scale_f = 32768.0f; float f; signed int s; unsigned int i, j, k; + /* PCM output is generally unused, but lib functions seem to use SIMD for f32 to s32 + round to zero */ for (i = 0; i < HCA_SUBFRAMES_PER_FRAME; i++) { for (j = 0; j < HCA_SAMPLES_PER_SUBFRAME; j++) { for (k = 0; k < hca->channels; k++) { f = hca->channel[k].wave[i][j]; //f = f * hca->rva_volume; /* rare, won't apply for now */ - if (f > 1.0f) { - f = 1.0f; - } else if (f < -1.0f) { - f = -1.0f; - } - s = (signed int) (f * scale); - if ((unsigned) (s + 0x8000) & 0xFFFF0000) - s = (s >> 31) ^ 0x7FFF; - *samples++ = (signed short) s; + s = (signed int)(f * scale_f); + if (s > 32767) + s = 32767; + else if (s < -32768) + s = -32768; + *samples++ = (signed short)s; } } } @@ -1860,57 +1859,58 @@ static void imdct_transform(stChannel* ch, int subframe) { static const unsigned int size = HCA_SAMPLES_PER_SUBFRAME; static const unsigned int half = HCA_SAMPLES_PER_SUBFRAME / 2; static const unsigned int mdct_bits = HCA_MDCT_BITS; + unsigned int i, j, k; + /* This IMDCT (supposedly standard) is all too crafty for me to simplify, see VGAudio (Mdct.Dct4). */ + + /* pre-pre-rotation(?) */ { - unsigned int i, j, k; - unsigned int count1a, count2a, count1b, count2b; - const float *temp1a, *temp1b; - float *temp2a, *temp2b; + unsigned int count1 = 1; + unsigned int count2 = half; + float* temp1 = ch->spectra; + float* temp2 = ch->temp; - /* this is all too crafty for me to simplify, see VGAudio (Mdct.Dct4) */ - - temp1a = ch->spectra; - temp2a = ch->temp; - count1a = 1; - count2a = half; for (i = 0; i < mdct_bits; i++) { float* swap; - float* d1 = &temp2a[0]; - float* d2 = &temp2a[count2a]; + float* d1 = &temp2[0]; + float* d2 = &temp2[count2]; - for (j = 0; j < count1a; j++) { - for (k = 0; k < count2a; k++) { - float a = *(temp1a++); - float b = *(temp1a++); - *(d1++) = b + a; + for (j = 0; j < count1; j++) { + for (k = 0; k < count2; k++) { + float a = *(temp1++); + float b = *(temp1++); + *(d1++) = a + b; *(d2++) = a - b; } - d1 += count2a; - d2 += count2a; + d1 += count2; + d2 += count2; } - swap = (float*) temp1a - HCA_SAMPLES_PER_SUBFRAME; /* move spectra/temp to beginning */ - temp1a = temp2a; - temp2a = swap; + swap = temp1 - HCA_SAMPLES_PER_SUBFRAME; /* move spectra or temp to beginning */ + temp1 = temp2; + temp2 = swap; - count1a = count1a << 1; - count2a = count2a >> 1; + count1 = count1 << 1; + count2 = count2 >> 1; } + } + + { + unsigned int count1 = half; + unsigned int count2 = 1; + float* temp1 = ch->temp; + float* temp2 = ch->spectra; - temp1b = ch->temp; - temp2b = ch->spectra; - count1b = half; - count2b = 1; for (i = 0; i < mdct_bits; i++) { const float* sin_table = (const float*) sin_tables_hex[i];//todo cleanup const float* cos_table = (const float*) cos_tables_hex[i]; float* swap; - float* d1 = temp2b; - float* d2 = &temp2b[count2b * 2 - 1]; - const float* s1 = &temp1b[0]; - const float* s2 = &temp1b[count2b]; + float* d1 = &temp2[0]; + float* d2 = &temp2[count2 * 2 - 1]; + const float* s1 = &temp1[0]; + const float* s2 = &temp1[count2]; - for (j = 0; j < count1b; j++) { - for (k = 0; k < count2b; k++) { + for (j = 0; j < count1; j++) { + for (k = 0; k < count2; k++) { float a = *(s1++); float b = *(s2++); float sin = *(sin_table++); @@ -1918,38 +1918,41 @@ static void imdct_transform(stChannel* ch, int subframe) { *(d1++) = a * sin - b * cos; *(d2--) = a * cos + b * sin; } - s1 += count2b; - s2 += count2b; - d1 += count2b; - d2 += count2b * 3; + s1 += count2; + s2 += count2; + d1 += count2; + d2 += count2 * 3; } - swap = (float*) temp1b; - temp1b = temp2b; - temp2b = swap; + swap = temp1; + temp1 = temp2; + temp2 = swap; - count1b = count1b >> 1; - count2b = count2b << 1; + count1 = count1 >> 1; + count2 = count2 << 1; } - +#if 0 /* copy dct */ /* (with the above optimization spectra is already modified, so this is redundant) */ for (i = 0; i < size; i++) { ch->dct[i] = ch->spectra[i]; } +#endif } - /* update output/imdct (lib fuses this with the above) */ + /* update output/imdct with overlapped window (lib fuses this with the above) */ { unsigned int i; + const float* dct = ch->spectra; //ch->dct; + const float* prev = ch->imdct_previous; for (i = 0; i < half; i++) { - ch->wave[subframe][i] = hcaimdct_window_float[i] * ch->dct[i + half] + ch->imdct_previous[i]; - ch->wave[subframe][i + half] = hcaimdct_window_float[i + half] * ch->dct[size - 1 - i] - ch->imdct_previous[i + half]; - ch->imdct_previous[i] = hcaimdct_window_float[size - 1 - i] * ch->dct[half - i - 1]; - ch->imdct_previous[i + half] = hcaimdct_window_float[half - i - 1] * ch->dct[i]; + ch->wave[subframe][i] = hcaimdct_window_float[i] * dct[i + half] + prev[i]; + ch->wave[subframe][i + half] = hcaimdct_window_float[i + half] * dct[size - 1 - i] - prev[i + half]; + ch->imdct_previous[i] = hcaimdct_window_float[size - 1 - i] * dct[half - i - 1]; + ch->imdct_previous[i + half] = hcaimdct_window_float[half - i - 1] * dct[i]; } #if 0 - /* over-optimized IMDCT (for reference), barely noticeable even when decoding hundred of files */ + /* over-optimized IMDCT window (for reference), barely noticeable even when decoding hundred of files */ const float* imdct_window = hcaimdct_window_float; const float* dct; float* imdct_previous;