mirror of
https://github.com/vgmstream/vgmstream.git
synced 2024-11-28 00:20:47 +01:00
Small HCA optimization
This commit is contained in:
parent
35a6c5c2f9
commit
9ec4877ee7
123
ext_libs/clHCA.c
123
ext_libs/clHCA.c
@ -325,26 +325,25 @@ int clHCA_getInfo(clHCA* hca, clHCA_stInfo *info) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//HCADecoder_DecodeBlockInt32
|
||||||
void clHCA_ReadSamples16(clHCA* hca, signed short *samples) {
|
void clHCA_ReadSamples16(clHCA* hca, signed short *samples) {
|
||||||
const float scale = 32768.0f;
|
const float scale_f = 32768.0f;
|
||||||
float f;
|
float f;
|
||||||
signed int s;
|
signed int s;
|
||||||
unsigned int i, j, k;
|
unsigned int i, j, k;
|
||||||
|
|
||||||
|
/* PCM output is generally unused, but lib functions seem to use SIMD for f32 to s32 + round to zero */
|
||||||
for (i = 0; i < HCA_SUBFRAMES_PER_FRAME; i++) {
|
for (i = 0; i < HCA_SUBFRAMES_PER_FRAME; i++) {
|
||||||
for (j = 0; j < HCA_SAMPLES_PER_SUBFRAME; j++) {
|
for (j = 0; j < HCA_SAMPLES_PER_SUBFRAME; j++) {
|
||||||
for (k = 0; k < hca->channels; k++) {
|
for (k = 0; k < hca->channels; k++) {
|
||||||
f = hca->channel[k].wave[i][j];
|
f = hca->channel[k].wave[i][j];
|
||||||
//f = f * hca->rva_volume; /* rare, won't apply for now */
|
//f = f * hca->rva_volume; /* rare, won't apply for now */
|
||||||
if (f > 1.0f) {
|
s = (signed int)(f * scale_f);
|
||||||
f = 1.0f;
|
if (s > 32767)
|
||||||
} else if (f < -1.0f) {
|
s = 32767;
|
||||||
f = -1.0f;
|
else if (s < -32768)
|
||||||
}
|
s = -32768;
|
||||||
s = (signed int) (f * scale);
|
*samples++ = (signed short)s;
|
||||||
if ((unsigned) (s + 0x8000) & 0xFFFF0000)
|
|
||||||
s = (s >> 31) ^ 0x7FFF;
|
|
||||||
*samples++ = (signed short) s;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1860,57 +1859,58 @@ static void imdct_transform(stChannel* ch, int subframe) {
|
|||||||
static const unsigned int size = HCA_SAMPLES_PER_SUBFRAME;
|
static const unsigned int size = HCA_SAMPLES_PER_SUBFRAME;
|
||||||
static const unsigned int half = HCA_SAMPLES_PER_SUBFRAME / 2;
|
static const unsigned int half = HCA_SAMPLES_PER_SUBFRAME / 2;
|
||||||
static const unsigned int mdct_bits = HCA_MDCT_BITS;
|
static const unsigned int mdct_bits = HCA_MDCT_BITS;
|
||||||
|
unsigned int i, j, k;
|
||||||
|
|
||||||
|
/* This IMDCT (supposedly standard) is all too crafty for me to simplify, see VGAudio (Mdct.Dct4). */
|
||||||
|
|
||||||
|
/* pre-pre-rotation(?) */
|
||||||
{
|
{
|
||||||
unsigned int i, j, k;
|
unsigned int count1 = 1;
|
||||||
unsigned int count1a, count2a, count1b, count2b;
|
unsigned int count2 = half;
|
||||||
const float *temp1a, *temp1b;
|
float* temp1 = ch->spectra;
|
||||||
float *temp2a, *temp2b;
|
float* temp2 = ch->temp;
|
||||||
|
|
||||||
/* this is all too crafty for me to simplify, see VGAudio (Mdct.Dct4) */
|
|
||||||
|
|
||||||
temp1a = ch->spectra;
|
|
||||||
temp2a = ch->temp;
|
|
||||||
count1a = 1;
|
|
||||||
count2a = half;
|
|
||||||
for (i = 0; i < mdct_bits; i++) {
|
for (i = 0; i < mdct_bits; i++) {
|
||||||
float* swap;
|
float* swap;
|
||||||
float* d1 = &temp2a[0];
|
float* d1 = &temp2[0];
|
||||||
float* d2 = &temp2a[count2a];
|
float* d2 = &temp2[count2];
|
||||||
|
|
||||||
for (j = 0; j < count1a; j++) {
|
for (j = 0; j < count1; j++) {
|
||||||
for (k = 0; k < count2a; k++) {
|
for (k = 0; k < count2; k++) {
|
||||||
float a = *(temp1a++);
|
float a = *(temp1++);
|
||||||
float b = *(temp1a++);
|
float b = *(temp1++);
|
||||||
*(d1++) = b + a;
|
*(d1++) = a + b;
|
||||||
*(d2++) = a - b;
|
*(d2++) = a - b;
|
||||||
}
|
}
|
||||||
d1 += count2a;
|
d1 += count2;
|
||||||
d2 += count2a;
|
d2 += count2;
|
||||||
}
|
}
|
||||||
swap = (float*) temp1a - HCA_SAMPLES_PER_SUBFRAME; /* move spectra/temp to beginning */
|
swap = temp1 - HCA_SAMPLES_PER_SUBFRAME; /* move spectra or temp to beginning */
|
||||||
temp1a = temp2a;
|
temp1 = temp2;
|
||||||
temp2a = swap;
|
temp2 = swap;
|
||||||
|
|
||||||
count1a = count1a << 1;
|
count1 = count1 << 1;
|
||||||
count2a = count2a >> 1;
|
count2 = count2 >> 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
unsigned int count1 = half;
|
||||||
|
unsigned int count2 = 1;
|
||||||
|
float* temp1 = ch->temp;
|
||||||
|
float* temp2 = ch->spectra;
|
||||||
|
|
||||||
temp1b = ch->temp;
|
|
||||||
temp2b = ch->spectra;
|
|
||||||
count1b = half;
|
|
||||||
count2b = 1;
|
|
||||||
for (i = 0; i < mdct_bits; i++) {
|
for (i = 0; i < mdct_bits; i++) {
|
||||||
const float* sin_table = (const float*) sin_tables_hex[i];//todo cleanup
|
const float* sin_table = (const float*) sin_tables_hex[i];//todo cleanup
|
||||||
const float* cos_table = (const float*) cos_tables_hex[i];
|
const float* cos_table = (const float*) cos_tables_hex[i];
|
||||||
float* swap;
|
float* swap;
|
||||||
float* d1 = temp2b;
|
float* d1 = &temp2[0];
|
||||||
float* d2 = &temp2b[count2b * 2 - 1];
|
float* d2 = &temp2[count2 * 2 - 1];
|
||||||
const float* s1 = &temp1b[0];
|
const float* s1 = &temp1[0];
|
||||||
const float* s2 = &temp1b[count2b];
|
const float* s2 = &temp1[count2];
|
||||||
|
|
||||||
for (j = 0; j < count1b; j++) {
|
for (j = 0; j < count1; j++) {
|
||||||
for (k = 0; k < count2b; k++) {
|
for (k = 0; k < count2; k++) {
|
||||||
float a = *(s1++);
|
float a = *(s1++);
|
||||||
float b = *(s2++);
|
float b = *(s2++);
|
||||||
float sin = *(sin_table++);
|
float sin = *(sin_table++);
|
||||||
@ -1918,38 +1918,41 @@ static void imdct_transform(stChannel* ch, int subframe) {
|
|||||||
*(d1++) = a * sin - b * cos;
|
*(d1++) = a * sin - b * cos;
|
||||||
*(d2--) = a * cos + b * sin;
|
*(d2--) = a * cos + b * sin;
|
||||||
}
|
}
|
||||||
s1 += count2b;
|
s1 += count2;
|
||||||
s2 += count2b;
|
s2 += count2;
|
||||||
d1 += count2b;
|
d1 += count2;
|
||||||
d2 += count2b * 3;
|
d2 += count2 * 3;
|
||||||
}
|
}
|
||||||
swap = (float*) temp1b;
|
swap = temp1;
|
||||||
temp1b = temp2b;
|
temp1 = temp2;
|
||||||
temp2b = swap;
|
temp2 = swap;
|
||||||
|
|
||||||
count1b = count1b >> 1;
|
count1 = count1 >> 1;
|
||||||
count2b = count2b << 1;
|
count2 = count2 << 1;
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
/* copy dct */
|
/* copy dct */
|
||||||
/* (with the above optimization spectra is already modified, so this is redundant) */
|
/* (with the above optimization spectra is already modified, so this is redundant) */
|
||||||
for (i = 0; i < size; i++) {
|
for (i = 0; i < size; i++) {
|
||||||
ch->dct[i] = ch->spectra[i];
|
ch->dct[i] = ch->spectra[i];
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update output/imdct (lib fuses this with the above) */
|
/* update output/imdct with overlapped window (lib fuses this with the above) */
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
const float* dct = ch->spectra; //ch->dct;
|
||||||
|
const float* prev = ch->imdct_previous;
|
||||||
|
|
||||||
for (i = 0; i < half; i++) {
|
for (i = 0; i < half; i++) {
|
||||||
ch->wave[subframe][i] = hcaimdct_window_float[i] * ch->dct[i + half] + ch->imdct_previous[i];
|
ch->wave[subframe][i] = hcaimdct_window_float[i] * dct[i + half] + prev[i];
|
||||||
ch->wave[subframe][i + half] = hcaimdct_window_float[i + half] * ch->dct[size - 1 - i] - ch->imdct_previous[i + half];
|
ch->wave[subframe][i + half] = hcaimdct_window_float[i + half] * dct[size - 1 - i] - prev[i + half];
|
||||||
ch->imdct_previous[i] = hcaimdct_window_float[size - 1 - i] * ch->dct[half - i - 1];
|
ch->imdct_previous[i] = hcaimdct_window_float[size - 1 - i] * dct[half - i - 1];
|
||||||
ch->imdct_previous[i + half] = hcaimdct_window_float[half - i - 1] * ch->dct[i];
|
ch->imdct_previous[i + half] = hcaimdct_window_float[half - i - 1] * dct[i];
|
||||||
}
|
}
|
||||||
#if 0
|
#if 0
|
||||||
/* over-optimized IMDCT (for reference), barely noticeable even when decoding hundred of files */
|
/* over-optimized IMDCT window (for reference), barely noticeable even when decoding hundred of files */
|
||||||
const float* imdct_window = hcaimdct_window_float;
|
const float* imdct_window = hcaimdct_window_float;
|
||||||
const float* dct;
|
const float* dct;
|
||||||
float* imdct_previous;
|
float* imdct_previous;
|
||||||
|
Loading…
Reference in New Issue
Block a user