mirror of
https://github.com/vgmstream/vgmstream.git
synced 2024-12-20 10:25:53 +01:00
1284 lines
48 KiB
C
1284 lines
48 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "g7221_decoder_lib.h"
|
|
#include "g7221_decoder_aes.h"
|
|
|
|
|
|
/* Decodes Siren14 from Namco's BNSF, a mono MLT/DCT-based codec for speech/sound (low bandwidth).
|
|
* Reverse engineered for various exes with info from Polycom's reference int decoder.
|
|
* - Reference decoder and codec info: https://www.itu.int/rec/T-REC-G.722.1/en
|
|
*
|
|
* Technically the name is "ITU-T G.722.1 Annex C" (official ITU-T spec), while "Siren14"
|
|
* was Polycom's original format with slightly different frames, though Namco calls it
|
|
* "Siren14 Version 3.02 For Products" while using G.722.1's frame format.
|
|
* Siren7 (7000hz bandwidth) isn't supported, only Siren14 (14000hz).
|
|
*
|
|
* Very roughly the encoder works like this:
|
|
* - Apply a Modulated Lapped Transform (MLT) function over 640*2 samples to get spectrum
|
|
* coefficients (can be decomposed into a window, overlap and add with a DCT-IV, of samples
|
|
* from a current frame and samples from a prev frame).
|
|
* - resulting coefs are divided into 28 bands called "regions" of 500hz.
|
|
* - Each region contains 20 MLT spectrum coefs, total 28 regions * 500hz = 14000hz.
|
|
* - Bands above 14khz are ignored (overall output quality isn't high).
|
|
* - Pack amplitude envelope bits, defined as the RMS (Root-Mean-Square) of the coefs in
|
|
* the region. First region sets all bits, rest is differentially and huffman coded.
|
|
* Remaning bits are left to quantize coefs.
|
|
* - Regions are given a "category" to quantize, that define the number of quantization bits
|
|
* and other coding parameters. Results are combined into vector indices,
|
|
* and Huffman-coded (frequent vectors require less bits).
|
|
* Decoding thus unpacks amplitudes, region coefs and does inverse MLT.
|
|
*
|
|
* Namco roughly follows the reference decoder ('refdec') with some differences:
|
|
* - uses mostly int32, while refdec has int16 with exact rounding/overflow handling (no output diffs)
|
|
* - modified random number generator (minor output diffs)
|
|
* - very minor change in bit unpacking (minor output diffs)
|
|
* - modified DCT-IV optimizations, scaling and window functions (minor output diffs)
|
|
* - internally PCM16 bufs, but converts to float (sample/32768.0) afterwards if the platform needs it
|
|
* - less error control (on error decoder is supposed to repeat last coefs)
|
|
* - can't decode Siren7, and given output diffs it's not actually ITU-compliant
|
|
* - minor optimizations here and there but otherwise very similar
|
|
* This decoder generally uses Polycom's terminology, and while some parts like the bitreader could be
|
|
* reimplemented they are mostly untouched for documentation purposes.
|
|
*
|
|
* TODO: missing some validations (may segfault on bad data),
|
|
* access indexes with (idx & max) and clamp buffer reads
|
|
*/
|
|
|
|
#include "g7221_decoder_lib_data.h"
|
|
|
|
/*****************************************************************************
|
|
* IMLT
|
|
*****************************************************************************/
|
|
|
|
static int imlt_window(int16_t* new_samples, int16_t* old_samples, int16_t* out_samples) {
|
|
int i;
|
|
int sample_lo, sample_hi;
|
|
int16_t win_val_lo, win_val_hi, new_val, old_val;
|
|
const int16_t *win_ptr_lo, *win_ptr_hi;
|
|
int16_t *new_ptr, *old_ptr, *out_ptr_lo, *out_ptr_hi;
|
|
|
|
|
|
/* overlap 2nd half of prev frame's samples and 1st half of current frame's samples with
|
|
* a window function to smooth out between frames */
|
|
win_ptr_lo = imlt_samples_window + 0;
|
|
win_ptr_hi = imlt_samples_window + 640;
|
|
new_ptr = new_samples + 320;
|
|
old_ptr = old_samples + 0;
|
|
out_ptr_lo = out_samples + 0;
|
|
out_ptr_hi = out_samples + 640;
|
|
|
|
while (out_ptr_lo != out_ptr_hi) {
|
|
win_val_lo = *win_ptr_lo++;
|
|
win_val_hi = *--win_ptr_hi;
|
|
new_val = *--new_ptr;
|
|
old_val = *old_ptr++;
|
|
|
|
sample_lo = (new_val * win_val_lo + old_val * *win_ptr_hi + 32768) >> 13;
|
|
if (sample_lo > 32767)
|
|
sample_lo = 32767;
|
|
else if (sample_lo < -32768)
|
|
sample_lo = -32768;
|
|
*out_ptr_lo++ = sample_lo;
|
|
|
|
sample_hi = (new_val * win_val_hi - old_val * win_val_lo + 32768) >> 13;
|
|
if (sample_hi > 32767)
|
|
sample_hi = 32767;
|
|
else if (sample_hi < -32768)
|
|
sample_hi = -32768;
|
|
*--out_ptr_hi = sample_hi;
|
|
}
|
|
|
|
/* save the 2nd half of the new samples to use above in next frame */
|
|
old_ptr = old_samples + 0;
|
|
new_ptr = new_samples + 320;
|
|
|
|
for (i = 0; i < 320; i++) {
|
|
old_ptr[i] = new_ptr[i];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* "dct4_x640_int" */
|
|
static int imlt_dct4(int16_t* mlt_coefs, int16_t* new_samples, int mag_shift) {
|
|
int i, j, k, n;
|
|
const uint8_t *set1_ptr;
|
|
int mod_shift, sub_shift;
|
|
|
|
|
|
/* vs refdec: very optimized, output is slightly different (louder) but it's massively
|
|
* faster (around 20% vs float refdec, int refdec was very slow to begin with).
|
|
* Can't quite clean this due to the complex math simplifications.
|
|
* Should correspond to: cos(PI*(t+0.5)*(k+0.5)/block_length) */
|
|
|
|
/* rotation butterflies? (cos/sin 640 groups) */
|
|
{
|
|
int cos_val, sin_val;
|
|
const uint16_t *cos_ptr, *sin_ptr;
|
|
int16_t mlt_val_lo, mlt_val_hi;
|
|
int16_t *mlt_ptr_lo, *mlt_ptr_hi;
|
|
|
|
mlt_ptr_lo = mlt_coefs + 0;
|
|
mlt_ptr_hi = mlt_coefs + 640;
|
|
cos_ptr = &imlt_cos_tables[0]; /* cos_table_64 */
|
|
sin_ptr = &imlt_sin_tables[0]; /* sin_table_64 */
|
|
|
|
for (i = 40; i > 0; --i) {
|
|
cos_val = *cos_ptr++;
|
|
sin_val = *sin_ptr++;
|
|
mlt_val_lo = *mlt_ptr_lo >> 1;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + 32768) >> 16;
|
|
*--mlt_ptr_hi = (sin_val * -mlt_val_lo + 32768) >> 16;
|
|
|
|
cos_val = *cos_ptr++;
|
|
sin_val = *sin_ptr++;
|
|
mlt_val_lo = *mlt_ptr_lo >> 1;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + 32768) >> 16;
|
|
*--mlt_ptr_hi = (sin_val * mlt_val_lo + 32768) >> 16;
|
|
}
|
|
|
|
for (i = 120; i > 0; --i) {
|
|
cos_val = *cos_ptr++;
|
|
sin_val = *sin_ptr++;
|
|
mlt_val_lo = *mlt_ptr_lo >> 1;
|
|
mlt_val_hi = *--mlt_ptr_hi >> 1;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
*mlt_ptr_hi = (sin_val * -mlt_val_lo + cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = *cos_ptr++;
|
|
sin_val = *sin_ptr++;
|
|
mlt_val_lo = *mlt_ptr_lo >> 1;
|
|
mlt_val_hi = *--mlt_ptr_hi >> 1;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
*mlt_ptr_hi = (sin_val * mlt_val_lo - cos_val * mlt_val_hi + 32768) >> 16;
|
|
}
|
|
}
|
|
|
|
/* sum/diff butterflies? */
|
|
{
|
|
int16_t mlt_val_lo, mlt_val_mlo, mlt_val_mhi, mlt_val_hi;
|
|
int16_t *mlt_ptr, *mlt_ptr_lo, *mlt_ptr_mlo, *mlt_ptr_mhi, *mlt_ptr_hi;
|
|
|
|
mlt_ptr = mlt_coefs + 0;
|
|
for (i = 2; i > 0; --i) {
|
|
mlt_ptr_lo = mlt_ptr + 0;
|
|
mlt_ptr_hi = mlt_ptr + 320;
|
|
mlt_ptr_mlo = mlt_ptr + 160;
|
|
mlt_ptr_mhi = mlt_ptr + 160;
|
|
for (j = 80; j > 0; --j) {
|
|
mlt_val_lo = *mlt_ptr_lo;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
mlt_val_mhi = *--mlt_ptr_mhi;
|
|
mlt_val_mlo = *mlt_ptr_mlo;
|
|
*mlt_ptr_lo++ = (mlt_val_hi + mlt_val_lo) >> 1;
|
|
*mlt_ptr_mlo++ = (mlt_val_lo - mlt_val_hi) >> 1;
|
|
*mlt_ptr_mhi = (mlt_val_mlo + mlt_val_mhi) >> 1;
|
|
*mlt_ptr_hi = (mlt_val_mhi - mlt_val_mlo) >> 1;
|
|
}
|
|
mlt_ptr += 320;
|
|
}
|
|
}
|
|
|
|
/* helper table used in next 3 sections */
|
|
set1_ptr = imlt_set1_table;
|
|
|
|
/* rotation butterflies? (cos/sin 160/80/40/20/10 groups) */
|
|
{
|
|
int cos_val, sin_val;
|
|
const uint16_t *cos_ptr, *sin_ptr, *cos_ptr_lo, *sin_ptr_lo;
|
|
int16_t mlt_val_lo, mlt_val_hi, mlt_val_mlo, mlt_val_mhi;
|
|
int16_t *mlt_ptr, *mlt_ptr_lo, *mlt_ptr_hi, *mlt_ptr_mlo, *mlt_ptr_mhi;
|
|
|
|
cos_ptr = &imlt_cos_tables[320+160]; /* cos_table_16 > 8 > 4 > 2 */
|
|
sin_ptr = &imlt_sin_tables[320+160]; /* sin_table_16 > 8 > 4 > 2 */
|
|
|
|
for (n = 160; n >= 20; n /= 2) {
|
|
mlt_ptr = mlt_coefs + 0;
|
|
while (mlt_ptr < mlt_coefs + 640) {
|
|
for (j = *set1_ptr; j > 0; --j) {
|
|
mlt_ptr_lo = mlt_ptr + 0;
|
|
mlt_ptr_hi = mlt_ptr + n;
|
|
mlt_ptr_mlo = mlt_ptr + (n / 2);
|
|
mlt_ptr_mhi = mlt_ptr + (n / 2);
|
|
for (k = n / 4; k > 0; --k) {
|
|
mlt_val_lo = *mlt_ptr_lo;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
mlt_val_mhi = *--mlt_ptr_mhi;
|
|
mlt_val_mlo = *mlt_ptr_mlo;
|
|
*mlt_ptr_lo++ = mlt_val_lo + mlt_val_hi;
|
|
*mlt_ptr_mlo++ = mlt_val_lo - mlt_val_hi;
|
|
*mlt_ptr_mhi = mlt_val_mlo + mlt_val_mhi;
|
|
*mlt_ptr_hi = mlt_val_mhi - mlt_val_mlo;
|
|
}
|
|
mlt_ptr += n;
|
|
}
|
|
set1_ptr++;
|
|
|
|
for (j = *set1_ptr; j > 0; --j) {
|
|
mlt_ptr_lo = mlt_ptr + 0;
|
|
mlt_ptr_hi = mlt_ptr + n;
|
|
cos_ptr_lo = cos_ptr + 0;
|
|
sin_ptr_lo = sin_ptr + 0;
|
|
for (k = n / 4; k > 0; --k) {
|
|
cos_val = *cos_ptr_lo++;
|
|
sin_val = *sin_ptr_lo++;
|
|
mlt_val_lo = *mlt_ptr_lo;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
*mlt_ptr_hi = (sin_val * -mlt_val_lo + cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = *cos_ptr_lo++;
|
|
sin_val = *sin_ptr_lo++;
|
|
mlt_val_lo = *mlt_ptr_lo;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*mlt_ptr_lo++ = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
*mlt_ptr_hi = (sin_val * mlt_val_lo - cos_val * mlt_val_hi + 32768) >> 16;
|
|
}
|
|
mlt_ptr += n;
|
|
}
|
|
set1_ptr++;
|
|
}
|
|
|
|
/* next sub-tables */
|
|
cos_ptr += n / 2;
|
|
sin_ptr += n / 2;
|
|
}
|
|
}
|
|
|
|
/* rotation butterflies? (cos/sin 5 groups) */
|
|
{
|
|
int cos_val, sin_val;
|
|
const uint16_t *cos_ptr, *sin_ptr;
|
|
int16_t mlt_val_lo, mlt_val_hi, mlt_val_mlo, mlt_val_mhi;
|
|
int16_t *mlt_ptr;
|
|
|
|
/* n/cos-sin would continue from above but for clarity: */
|
|
cos_ptr = &imlt_cos_tables[320+160+80+40+20+10]; /* cos_table_1 */
|
|
sin_ptr = &imlt_sin_tables[320+160+80+40+20+10]; /* sin_table_1 */
|
|
|
|
{
|
|
n = 10;
|
|
mlt_ptr = mlt_coefs + 0;
|
|
while (mlt_ptr < mlt_coefs + 640) {
|
|
for (j = *set1_ptr; j > 0; --j) {
|
|
mlt_val_lo = mlt_ptr[0];
|
|
mlt_val_hi = mlt_ptr[n - 1];
|
|
mlt_val_mlo = mlt_ptr[n / 2 - 1];
|
|
mlt_val_mhi = mlt_ptr[n / 2];
|
|
mlt_ptr[0] = mlt_val_lo + mlt_val_hi;
|
|
mlt_ptr[n / 2] = mlt_val_lo - mlt_val_hi;
|
|
mlt_ptr[n / 2 - 1] = mlt_val_mhi + mlt_val_mlo;
|
|
mlt_ptr[n - 1] = mlt_val_mlo - mlt_val_mhi;
|
|
|
|
mlt_val_lo = mlt_ptr[1];
|
|
mlt_val_hi = mlt_ptr[n - 2];
|
|
mlt_val_mlo = mlt_ptr[n / 2 - 2];
|
|
mlt_val_mhi = mlt_ptr[n / 2 + 1];
|
|
mlt_ptr[1] = mlt_val_hi + mlt_val_lo;
|
|
mlt_ptr[n / 2 + 1] = mlt_val_lo - mlt_val_hi;
|
|
mlt_ptr[n / 2 - 2] = mlt_val_mhi + mlt_val_mlo;
|
|
mlt_ptr[n - 2] = mlt_val_mlo - mlt_val_mhi;
|
|
|
|
mlt_val_lo = mlt_ptr[2];
|
|
mlt_val_hi = mlt_ptr[n - 3];
|
|
mlt_ptr[2] = mlt_val_hi + mlt_val_lo;
|
|
mlt_ptr[n / 2 + 2] = mlt_val_lo - mlt_val_hi;
|
|
|
|
mlt_ptr += n;
|
|
}
|
|
|
|
cos_val = cos_ptr[0];
|
|
sin_val = sin_ptr[0];
|
|
mlt_val_lo = mlt_ptr[0];
|
|
mlt_val_hi = mlt_ptr[n - 1];
|
|
mlt_ptr[0] = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
mlt_ptr[n - 1] = (sin_val * mlt_val_lo - cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = cos_ptr[1];
|
|
sin_val = sin_ptr[1];
|
|
mlt_val_lo = mlt_ptr[1];
|
|
mlt_val_hi = mlt_ptr[n - 2];
|
|
mlt_ptr[1] = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
mlt_ptr[n - 2] = (sin_val * -mlt_val_lo + cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = cos_ptr[2];
|
|
sin_val = sin_ptr[2];
|
|
mlt_val_lo = mlt_ptr[2];
|
|
mlt_val_hi = mlt_ptr[n - 3];
|
|
mlt_ptr[2] = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
mlt_ptr[n - 3]= (sin_val * mlt_val_lo - cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = cos_ptr[3];
|
|
sin_val = sin_ptr[3];
|
|
mlt_val_lo = mlt_ptr[3];
|
|
mlt_val_hi = mlt_ptr[n - 4];
|
|
mlt_ptr[3] = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
mlt_ptr[n - 4] = (sin_val * -mlt_val_lo + cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
cos_val = cos_ptr[4];
|
|
sin_val = sin_ptr[4];
|
|
mlt_val_lo = mlt_ptr[4];
|
|
mlt_val_hi = mlt_ptr[n - 5];
|
|
mlt_ptr[4] = (cos_val * mlt_val_lo + sin_val * mlt_val_hi + 32768) >> 16;
|
|
mlt_ptr[n - 5] = (sin_val * mlt_val_lo - cos_val * mlt_val_hi + 32768) >> 16;
|
|
|
|
mlt_ptr += n;
|
|
set1_ptr += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
mod_shift = mag_shift - 1;
|
|
sub_shift = 1;
|
|
if (mod_shift >= 8)
|
|
sub_shift = 2;
|
|
mod_shift -= sub_shift;
|
|
|
|
|
|
/* dct core? */
|
|
{
|
|
const int16_t *mlt_ptr;
|
|
int16_t *new_ptr;
|
|
|
|
mlt_ptr = mlt_coefs + 0;
|
|
new_ptr = new_samples + 0;
|
|
while (1) {
|
|
for (i = *set1_ptr; i; --i) {
|
|
new_ptr[0] = (mlt_ptr[4] + mlt_ptr[3] + mlt_ptr[2] + mlt_ptr[1] + mlt_ptr[0]) >> sub_shift;
|
|
new_ptr[1] = (19261 * mlt_ptr[1] + 31164 * mlt_ptr[0] - 19261 * mlt_ptr[3] - 31164 * mlt_ptr[4]) >> (sub_shift + 15);
|
|
new_ptr[2] = (26510 * mlt_ptr[4] + 26510 * mlt_ptr[0] - 10126 * mlt_ptr[1] - 32768 * mlt_ptr[2] - 10126 * mlt_ptr[3]) >> (sub_shift + 15);
|
|
new_ptr[3] = (31164 * mlt_ptr[3] + 19261 * mlt_ptr[0] - 31164 * mlt_ptr[1] - 19261 * mlt_ptr[4]) >> (sub_shift + 15);
|
|
new_ptr[4] = (10126 * mlt_ptr[4] + 32768 * mlt_ptr[2] + 10126 * mlt_ptr[0] - 26510 * mlt_ptr[1] - 26510 * mlt_ptr[3]) >> (sub_shift + 15);
|
|
mlt_ptr += 5;
|
|
new_ptr += 5;
|
|
}
|
|
set1_ptr += 2;
|
|
|
|
if (mlt_ptr >= mlt_coefs + 640)
|
|
break;
|
|
|
|
new_ptr[0] = ( 5126 * mlt_ptr[4] + 14876 * mlt_ptr[3] + 23170 * mlt_ptr[2] + 32365 * mlt_ptr[0] + 29197 * mlt_ptr[1]) >> (sub_shift + 15);
|
|
new_ptr[1] = (-14876 * mlt_ptr[4] + -32365 * mlt_ptr[3] + 5126 * mlt_ptr[1] + 29197 * mlt_ptr[0] - 23170 * mlt_ptr[2]) >> (sub_shift + 15);
|
|
new_ptr[2] = ( 23170 * mlt_ptr[4] + 23170 * mlt_ptr[3] + -23170 * mlt_ptr[1] + 23170 * mlt_ptr[0] - 23170 * mlt_ptr[2]) >> (sub_shift + 15);
|
|
new_ptr[3] = (-29197 * mlt_ptr[4] + 5126 * mlt_ptr[3] + 23170 * mlt_ptr[2] + 14876 * mlt_ptr[0] - 32365 * mlt_ptr[1]) >> (sub_shift + 15);
|
|
new_ptr[4] = ( 32365 * mlt_ptr[4] + -29197 * mlt_ptr[3] + 23170 * mlt_ptr[2] + 5126 * mlt_ptr[0] - 14876 * mlt_ptr[1]) >> (sub_shift + 15);
|
|
mlt_ptr += 5;
|
|
new_ptr += 5;
|
|
}
|
|
}
|
|
|
|
/* swapping and sum/diffs? */
|
|
{
|
|
const uint8_t *set2_ptr;
|
|
int16_t *mlt_ptr, *new_ptr;
|
|
int16_t tmp1_val_a, tmp1_val_b;
|
|
int16_t *tmp0_ptr, *tmp1_ptr, *tmp1_ptr_lo, *tmp1_ptr_mlo, *tmp1_ptr_mhi, *tmp2_ptr;
|
|
|
|
set2_ptr = imlt_set2_table;
|
|
|
|
mlt_ptr = mlt_coefs + 0;
|
|
new_ptr = new_samples + 0;
|
|
while (new_ptr < new_samples + 640) {
|
|
for (i = *set2_ptr; i; --i) {
|
|
*mlt_ptr++ = new_ptr[0];
|
|
*mlt_ptr++ = new_ptr[5];
|
|
*mlt_ptr++ = new_ptr[1];
|
|
*mlt_ptr++ = new_ptr[6];
|
|
*mlt_ptr++ = new_ptr[2];
|
|
*mlt_ptr++ = new_ptr[7];
|
|
*mlt_ptr++ = new_ptr[3];
|
|
*mlt_ptr++ = new_ptr[8];
|
|
*mlt_ptr++ = new_ptr[4];
|
|
*mlt_ptr++ = new_ptr[9];
|
|
new_ptr += 10;
|
|
}
|
|
set2_ptr++;
|
|
|
|
*mlt_ptr++ = new_ptr[0];
|
|
*mlt_ptr++ = new_ptr[9] + new_ptr[1];
|
|
*mlt_ptr++ = new_ptr[1] - new_ptr[9];
|
|
*mlt_ptr++ = new_ptr[2] - new_ptr[8];
|
|
*mlt_ptr++ = new_ptr[8] + new_ptr[2];
|
|
*mlt_ptr++ = new_ptr[7] + new_ptr[3];
|
|
*mlt_ptr++ = new_ptr[3] - new_ptr[7];
|
|
*mlt_ptr++ = new_ptr[4] - new_ptr[6];
|
|
*mlt_ptr++ = new_ptr[6] + new_ptr[4];
|
|
*mlt_ptr++ = new_ptr[5];
|
|
new_ptr += 10;
|
|
}
|
|
|
|
/* below is some three way swapping, tmp ptrs change between mlt<>new */
|
|
tmp0_ptr = mlt_coefs + 640;
|
|
tmp1_ptr = new_samples + 640;
|
|
for (n = 20; n <= 160; n *= 2) {
|
|
tmp2_ptr = tmp0_ptr + 0;
|
|
tmp0_ptr = tmp1_ptr - 640;
|
|
tmp1_ptr = tmp2_ptr - 640;
|
|
do {
|
|
for (j = *set2_ptr; j > 0; --j) {
|
|
tmp1_ptr_mhi = tmp1_ptr + (n / 2);
|
|
for (k = n / 4; k > 0; --k) {
|
|
*tmp0_ptr++ = *tmp1_ptr++;
|
|
*tmp0_ptr++ = *tmp1_ptr_mhi++;
|
|
*tmp0_ptr++ = *tmp1_ptr++;
|
|
*tmp0_ptr++ = *tmp1_ptr_mhi++;
|
|
}
|
|
tmp1_ptr += n / 2;
|
|
}
|
|
set2_ptr++;
|
|
|
|
if (tmp1_ptr >= tmp2_ptr)
|
|
break;
|
|
|
|
tmp1_ptr_lo = tmp1_ptr + 0;
|
|
tmp1_ptr_mlo = tmp1_ptr + (n - 1);
|
|
|
|
*tmp0_ptr++ = *tmp1_ptr_lo++;
|
|
|
|
tmp1_val_a = *tmp1_ptr_lo++;
|
|
tmp1_val_b = *tmp1_ptr_mlo;
|
|
*tmp0_ptr++ = tmp1_val_b + tmp1_val_a;
|
|
*tmp0_ptr++ = tmp1_val_a - tmp1_val_b;
|
|
for (j = (n / 2 - 2) / 2; j > 0; --j) {
|
|
tmp1_val_a = *tmp1_ptr_lo++;
|
|
tmp1_val_b = *--tmp1_ptr_mlo;
|
|
*tmp0_ptr++ = tmp1_val_a - tmp1_val_b;
|
|
*tmp0_ptr++ = tmp1_val_b + tmp1_val_a;
|
|
|
|
tmp1_val_a = *tmp1_ptr_lo++;
|
|
tmp1_val_b = *--tmp1_ptr_mlo;
|
|
*tmp0_ptr++ = tmp1_val_b + tmp1_val_a;
|
|
*tmp0_ptr++ = tmp1_val_a - tmp1_val_b;
|
|
}
|
|
*tmp0_ptr++ = -*tmp1_ptr_lo;
|
|
tmp1_ptr += n;
|
|
}
|
|
while (tmp1_ptr < tmp2_ptr);
|
|
}
|
|
}
|
|
|
|
/* final modifications and post scaling? */
|
|
{
|
|
int16_t mlt_val_lo, mlt_val_mhi, mlt_val_mlo, mlt_val_hi;
|
|
const int16_t *mlt_ptr_lo, *mlt_ptr_hi, *mlt_ptr_mlo, *mlt_ptr_mhi;
|
|
int16_t *new_ptr;
|
|
|
|
if (mod_shift <= 0) {
|
|
/* negative scale (right shift) */
|
|
mod_shift = -mod_shift;
|
|
|
|
mlt_ptr_lo = mlt_coefs + 0;
|
|
mlt_ptr_mlo = mlt_coefs + 160;
|
|
mlt_ptr_mhi = mlt_coefs + 480;
|
|
mlt_ptr_hi = mlt_coefs + 640;
|
|
new_ptr = new_samples + 0;
|
|
|
|
mlt_val_lo = *mlt_ptr_lo++;
|
|
*new_ptr++ = mlt_val_lo << mod_shift;
|
|
|
|
mlt_val_mlo = *mlt_ptr_mlo++;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*new_ptr++ = (mlt_val_hi + mlt_val_mlo) << mod_shift;
|
|
*new_ptr++ = (mlt_val_mlo - mlt_val_hi) << mod_shift;
|
|
|
|
for (i = 159; i > 0; --i) {
|
|
mlt_val_lo = *mlt_ptr_lo++;
|
|
mlt_val_mhi = *--mlt_ptr_mhi;
|
|
*new_ptr++ = (mlt_val_lo - mlt_val_mhi) << mod_shift;
|
|
*new_ptr++ = (mlt_val_mhi + mlt_val_lo) << mod_shift;
|
|
|
|
mlt_val_mlo = *mlt_ptr_mlo++;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*new_ptr++ = (mlt_val_hi + mlt_val_mlo) << mod_shift;
|
|
*new_ptr++ = (mlt_val_mlo - mlt_val_hi) << mod_shift;
|
|
}
|
|
|
|
*new_ptr = -*mlt_ptr_mlo << mod_shift;
|
|
}
|
|
else {
|
|
/* same but positive (left shift) */
|
|
|
|
mlt_ptr_lo = mlt_coefs + 0;
|
|
mlt_ptr_mlo = mlt_coefs + 160;
|
|
mlt_ptr_mhi = mlt_coefs + 480;
|
|
mlt_ptr_hi = mlt_coefs + 640;
|
|
new_ptr = new_samples + 0;
|
|
|
|
mlt_val_lo = *mlt_ptr_lo++;
|
|
*new_ptr++ = mlt_val_lo >> mod_shift;
|
|
|
|
mlt_val_mlo = *mlt_ptr_mlo++;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*new_ptr++ = (mlt_val_hi + mlt_val_mlo) >> mod_shift;
|
|
*new_ptr++ = (mlt_val_mlo - mlt_val_hi) >> mod_shift;
|
|
|
|
for (i = 159; i > 0; --i) {
|
|
mlt_val_lo = *mlt_ptr_lo++;
|
|
mlt_val_mhi = *--mlt_ptr_mhi;
|
|
*new_ptr++ = (mlt_val_lo - mlt_val_mhi) >> mod_shift;
|
|
*new_ptr++ = (mlt_val_mhi + mlt_val_lo) >> mod_shift;
|
|
|
|
mlt_val_mlo = *mlt_ptr_mlo++;
|
|
mlt_val_hi = *--mlt_ptr_hi;
|
|
*new_ptr++ = (mlt_val_hi + mlt_val_mlo) >> mod_shift;
|
|
*new_ptr++ = (mlt_val_mlo - mlt_val_hi) >> mod_shift;
|
|
}
|
|
|
|
*new_ptr = -*mlt_ptr_mlo >> mod_shift;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* "inverse_MLT" */
|
|
static int rmlt_coefs_to_samples(int mag_shift, int16_t* mlt_coefs, int16_t* old_samples, int16_t* out_samples /*, int p_samples_done*/) {
|
|
int res;
|
|
int16_t new_samples[640];
|
|
|
|
/* block transform MLT spectrum coefs to time domain PCM samples using DCT-IV (inverse) */
|
|
res = imlt_dct4(mlt_coefs, new_samples, mag_shift);
|
|
if (res < 0) return res;
|
|
|
|
/* apply IMLT overlapped window filter function (640 samples) */
|
|
res = imlt_window(new_samples, old_samples, out_samples);
|
|
if (res < 0) return res;
|
|
|
|
//*p_samples_done = 640; /* in Namco's code but actually ignored */
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* UNPACKING
|
|
*****************************************************************************/
|
|
|
|
static inline int calc_offset(const int* absolute_region_power_index, int available_bits) {
|
|
int region, cat_index;
|
|
int offset, delta;
|
|
|
|
offset = -32;
|
|
delta = 32;
|
|
do {
|
|
int test_offset = offset + delta;
|
|
int bits = 0;
|
|
|
|
/* obtain a category for each region using the test offset */
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
cat_index = (test_offset - absolute_region_power_index[region]) / 2;
|
|
if (cat_index < 0)
|
|
cat_index = 0;
|
|
else if (cat_index > NUM_CATEGORIES - 1)
|
|
cat_index = NUM_CATEGORIES - 1;
|
|
|
|
/* compute the number of bits that will be used given the cat assignments */
|
|
bits += expected_bits_table[cat_index];
|
|
}
|
|
|
|
/* if (bits > available_bits - 32) then divide the offset region for the bin search */
|
|
if (bits >= available_bits - 32) {
|
|
offset = test_offset;
|
|
}
|
|
delta /= 2;
|
|
}
|
|
while (delta > 0);
|
|
|
|
return offset;
|
|
}
|
|
|
|
static inline void compute_raw_power_categories(int* power_categories, const int* absolute_region_power_index, int offset) {
|
|
int region, cat_index;
|
|
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
cat_index = (offset - absolute_region_power_index[region]) / 2;
|
|
if (cat_index < 0)
|
|
cat_index = 0;
|
|
else if (cat_index > NUM_CATEGORIES - 1)
|
|
cat_index = NUM_CATEGORIES - 1;
|
|
|
|
power_categories[region] = cat_index;
|
|
}
|
|
}
|
|
|
|
static inline void comp_powercat_and_catbalance(int* power_categories, int* category_balances, const int* absolute_region_power_index, int available_bits, int offset) {
|
|
int region, ccp;
|
|
int max_rate_categories[NUMBER_OF_REGIONS];
|
|
int min_rate_categories[NUMBER_OF_REGIONS];
|
|
int temp_category_balances[2*NUM_CATEGORIZATION_CONTROL_POSSIBILITIES];
|
|
int expected_number_of_code_bits, max, min, max_rate_pointer, min_rate_pointer;
|
|
|
|
|
|
/* Namco uses power_categories directly instead of max_rate_categories, but we'll separate for clarity.
|
|
* It also loads min_rate_categories and expected_number_of_code_bits in the previous region loop */
|
|
expected_number_of_code_bits = 0;
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
int power_category = power_categories[region];
|
|
max_rate_categories[region] = power_category;
|
|
min_rate_categories[region] = power_category;
|
|
expected_number_of_code_bits += expected_bits_table[power_category];
|
|
}
|
|
|
|
max = expected_number_of_code_bits;
|
|
min = expected_number_of_code_bits;
|
|
max_rate_pointer = NUM_CATEGORIZATION_CONTROL_POSSIBILITIES;
|
|
min_rate_pointer = NUM_CATEGORIZATION_CONTROL_POSSIBILITIES;
|
|
|
|
for (ccp = 0; ccp < NUM_CATEGORIZATION_CONTROL_POSSIBILITIES - 1; ccp++) {
|
|
|
|
if (max + min <= available_bits * 2) {
|
|
int raw_min = 10000;
|
|
int raw_min_index = 0;
|
|
|
|
/* Search from lowest freq regions to highest for best */
|
|
/* region to reassign to a higher bit rate category. */
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
if (max_rate_categories[region] > 0) {
|
|
int tmp = (offset - absolute_region_power_index[region]) - (max_rate_categories[region] * 2);
|
|
if (tmp < raw_min) {
|
|
raw_min = tmp;
|
|
raw_min_index = region;
|
|
}
|
|
}
|
|
}
|
|
|
|
max_rate_pointer--;
|
|
temp_category_balances[max_rate_pointer] = raw_min_index;
|
|
|
|
max -= expected_bits_table[max_rate_categories[raw_min_index]];
|
|
max_rate_categories[raw_min_index]--;
|
|
max += expected_bits_table[max_rate_categories[raw_min_index]];
|
|
}
|
|
else {
|
|
int raw_max = -10000;
|
|
int raw_max_index = NUMBER_OF_REGIONS - 1;
|
|
|
|
/* Search from highest freq regions to lowest for best region to reassign to a lower bit rate category. */
|
|
for (region = NUMBER_OF_REGIONS - 1; region >= 0; region--) {
|
|
if (min_rate_categories[region] < NUM_CATEGORIES - 1) {
|
|
int tmp = (offset - absolute_region_power_index[region]) - (min_rate_categories[region] * 2);
|
|
if (tmp > raw_max) {
|
|
raw_max = tmp;
|
|
raw_max_index = region;
|
|
}
|
|
}
|
|
}
|
|
|
|
temp_category_balances[min_rate_pointer] = raw_max_index;
|
|
min_rate_pointer++;
|
|
|
|
min -= expected_bits_table[min_rate_categories[raw_max_index]];
|
|
min_rate_categories[raw_max_index]++;
|
|
min += expected_bits_table[min_rate_categories[raw_max_index]];
|
|
}
|
|
}
|
|
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
power_categories[region] = max_rate_categories[region];
|
|
}
|
|
|
|
for (ccp = 0; ccp < NUM_CATEGORIZATION_CONTROL_POSSIBILITIES - 1; ccp++) {
|
|
category_balances[ccp] = temp_category_balances[max_rate_pointer + ccp];
|
|
}
|
|
}
|
|
|
|
static int categorize(int available_bits, const int* absolute_region_power_index, int* power_categories, int* category_balances) {
|
|
int offset;
|
|
|
|
/* compensate increased bit usage for higher bitrates (used?) */
|
|
if (available_bits > MAX_DCT_LENGTH) {
|
|
available_bits = 5 * (available_bits - MAX_DCT_LENGTH) / 8 + MAX_DCT_LENGTH;
|
|
}
|
|
|
|
/* calculate category stuff (originally inline'd) */
|
|
|
|
offset = calc_offset(absolute_region_power_index, available_bits);
|
|
|
|
compute_raw_power_categories(power_categories, absolute_region_power_index, offset);
|
|
|
|
comp_powercat_and_catbalance(power_categories, category_balances, absolute_region_power_index, available_bits, offset);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void index_to_array(int index, int* array_cv, int category) {
|
|
int q, p;
|
|
int max_bin_plus_one = max_bin_plus1[category];
|
|
int inverse_of_max_bin_plus_one_scaled = max_bin_plus_one_inverse_scaled[category];
|
|
|
|
/* vs refdec: unrolled, inline'd version of the inverted loop, with some ops simplified
|
|
* (depending on pre-scaled tables), since this is called many times.
|
|
* From tests it's not too noticeable though. */
|
|
|
|
p = index;
|
|
/* fills array (vector_dimension[category] - 1) times inversely */
|
|
switch (category) {
|
|
case 0:
|
|
case 1:
|
|
case 2:
|
|
q = (p * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[1] = p - (q * max_bin_plus_one);
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[0] = p - (q * max_bin_plus_one);
|
|
//p = q;
|
|
break;
|
|
|
|
case 3:
|
|
q = (p * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[3] = p - (q * 5); //max_bin_plus_one = 5
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[2] = p - (q * 5); //max_bin_plus_one = 5
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[1] = p - (q * 5); //max_bin_plus_one = 5
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[0] = p - (q * 5); //max_bin_plus_one = 5
|
|
//p = q;
|
|
break;
|
|
|
|
case 4:
|
|
array_cv[3] = p & 3;
|
|
p >>= 2;
|
|
|
|
array_cv[2] = p & 3;
|
|
p >>= 2;
|
|
|
|
array_cv[1] = p & 3;
|
|
p >>= 2;
|
|
|
|
array_cv[0] = p & 3;
|
|
/* not sure how this case is optimized */
|
|
break;
|
|
|
|
case 5:
|
|
q = (p * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[4] = p - (q * 3); //max_bin_plus_one = 3
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[3] = p - (q * 3); //max_bin_plus_one = 3
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[2] = p - (q * 3); //max_bin_plus_one = 3
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[1] = p - (q * 3); //max_bin_plus_one = 3
|
|
p = q;
|
|
|
|
q = (q * inverse_of_max_bin_plus_one_scaled) >> 16;
|
|
array_cv[0] = p - (q * 3); //max_bin_plus_one = 3
|
|
//p = q;
|
|
break;
|
|
|
|
case 6:
|
|
array_cv[4] = p & 1;
|
|
p >>= 1;
|
|
|
|
array_cv[3] = p & 1;
|
|
p >>= 1;
|
|
|
|
array_cv[2] = p & 1;
|
|
p >>= 1;
|
|
|
|
array_cv[1] = p & 1;
|
|
p >>= 1;
|
|
|
|
array_cv[0] = p & 1;
|
|
//p >>= 1;
|
|
/* not sure how this case is optimized */
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int decode_vector_quantized_mlt_indices(uint32_t* data_u32, int* p_bitpos, int bit_count, uint32_t* p_random_value, int* decoder_region_standard_deviation, int* power_categories, int16_t* mlt_coefs) {
|
|
int16_t standard_deviation;
|
|
int array_cv[MAX_VECTOR_DIMENSION];
|
|
int i, v, region, category, index;
|
|
uint32_t cur_u32, bitmask;
|
|
uint32_t* ptr_u32;
|
|
|
|
/* bitreading setup */
|
|
ptr_u32 = &data_u32[(*p_bitpos >> 5)];
|
|
bitmask = 1 << (31 - (*p_bitpos & 0x1F));
|
|
cur_u32 = *ptr_u32;
|
|
ptr_u32++;
|
|
|
|
|
|
/* read MLT coefs per region, differently depending on the category config */
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
standard_deviation = decoder_region_standard_deviation[region];
|
|
category = power_categories[region];
|
|
|
|
/* lower categories encode MLT coefs based on vectors incides + huffman (?) */
|
|
if (category < 7) {
|
|
const int16_t* decoder_tree_ptr = table_of_decoder_tables[category];
|
|
int16_t* decoder_mlt_ptr = &mlt_coefs[region * REGION_SIZE];
|
|
|
|
for (v = 0; v < number_of_vectors[category]; v++) {
|
|
index = 0;
|
|
|
|
do {
|
|
int bit = (bitmask & cur_u32) != 0;
|
|
bitmask >>= 1;
|
|
(*p_bitpos)++;
|
|
if (bitmask == 0) {
|
|
bitmask = 0x80000000;
|
|
cur_u32 = *ptr_u32;
|
|
ptr_u32++;
|
|
}
|
|
|
|
index = *(decoder_tree_ptr + (index*2) + bit);
|
|
}
|
|
while (index > 0);
|
|
|
|
/* ran out of bits */
|
|
if (ptr_u32 > &data_u32[bit_count >> 5]) {
|
|
category = 7; /* this category doesn't bitread and only noise fills */
|
|
|
|
/* Namco doesn't set remaining regions to category 7 like the spec, nor checks
|
|
* when reading sign bits below, but doesn't seem to cause any problems */
|
|
//for (i = region + 1; i < NUMBER_OF_REGIONS; i++) {
|
|
// power_categories[i] = 7;
|
|
//}
|
|
break;
|
|
}
|
|
|
|
index = -index;
|
|
|
|
/* convert index into array of sign bits used to access the centroid table */
|
|
index_to_array(index, array_cv, category);
|
|
|
|
/* vs refdec: sign reading slightly simplified */
|
|
|
|
for (i = 0; i < vector_dimension[category]; i++) {
|
|
int decoder_mlt_value = 0;
|
|
int negative;
|
|
|
|
/* non-zero array = results in non-zero coef and encodes bit sign */
|
|
if (array_cv[i] != 0) {
|
|
decoder_mlt_value = standard_deviation * mlt_quant_centroid[category][array_cv[i]];
|
|
decoder_mlt_value = decoder_mlt_value >> 12;
|
|
|
|
negative = (bitmask & cur_u32) != 0;
|
|
bitmask >>= 1;
|
|
(*p_bitpos)++;
|
|
if (bitmask == 0) {
|
|
bitmask = 0x80000000;
|
|
cur_u32 = *ptr_u32;
|
|
ptr_u32++;
|
|
}
|
|
|
|
if (negative == 0)
|
|
decoder_mlt_value = -decoder_mlt_value;
|
|
}
|
|
|
|
*decoder_mlt_ptr = decoder_mlt_value;
|
|
decoder_mlt_ptr++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* higher categories don't encode all 20 MLT coefs, so rest are filled with
|
|
* noise to pretend we have something */
|
|
if (category >= 5) {
|
|
static const int noise_fill_factor[3] = {5793, 8192, 23170}; /* 0.176777, 0.25, 0.707107 */
|
|
uint32_t random_value;
|
|
|
|
int16_t* decoder_mlt_ptr = &mlt_coefs[region * REGION_SIZE];
|
|
int16_t noise_fill_pos = (standard_deviation * noise_fill_factor[category - 5]) >> 15; /* should be int16 */
|
|
int16_t noise_fill_neg = -noise_fill_pos;
|
|
|
|
/* vs refdec: updated differently (with hist state), and reupdated after 10 coefs */
|
|
*p_random_value *= 69069;
|
|
random_value = *p_random_value;
|
|
|
|
/* in some versions of Namco's decoder this is unrolled too */
|
|
|
|
if (category >= 7) {
|
|
/* all coefs are noise-filled */
|
|
for (i = 0; i < REGION_SIZE; i++) {
|
|
{
|
|
if (random_value & 1)
|
|
*decoder_mlt_ptr = noise_fill_pos;
|
|
else
|
|
*decoder_mlt_ptr = noise_fill_neg;
|
|
random_value = (random_value >> 1);
|
|
}
|
|
decoder_mlt_ptr++;
|
|
}
|
|
}
|
|
else {
|
|
/* some coefs are noise-filled */
|
|
for (i = 0; i < REGION_SIZE; i++) {
|
|
if (*decoder_mlt_ptr == 0) {
|
|
if (random_value & 1)
|
|
*decoder_mlt_ptr = noise_fill_pos;
|
|
else
|
|
*decoder_mlt_ptr = noise_fill_neg;
|
|
random_value = (random_value >> 1);
|
|
}
|
|
decoder_mlt_ptr++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* unpacks input buffer into MLT coefs */
|
|
static int unpack_frame(int bit_rate, const uint8_t* data, int frame_size, /*int* p_frame_size, */ int* p_mag_shift, int16_t* mlt_coefs, uint32_t* p_random_value, int test_errors) {
|
|
uint32_t data_u32[0x78/4 + 2];
|
|
int bitpos, expected_frame_size;
|
|
int power_categories[NUMBER_OF_REGIONS];
|
|
int category_balances[NUM_CATEGORIZATION_CONTROL_POSSIBILITIES-1];
|
|
int absolute_region_power_index[NUMBER_OF_REGIONS]; /* a.k.a. RMS_index */
|
|
int decoder_region_standard_deviation[NUMBER_OF_REGIONS];
|
|
uint16_t categorization_control;
|
|
int i;
|
|
int res;
|
|
|
|
|
|
/* setup bitreading */
|
|
{
|
|
expected_frame_size = bit_rate / 8 / 50;
|
|
if (frame_size < expected_frame_size)
|
|
return 1;
|
|
//p_frame_size = expected_frame_size; /* Namco returns this, for some reason */
|
|
|
|
/* Siren14 data is packed into U16 LE, but Namco reads and stores them in a U32 LE temp array for their bitreading */
|
|
for (i = 0; i < (expected_frame_size >> 2); i++) {
|
|
data_u32[i] = (data[0x04*i + 2] << 0) | (data[0x04*i + 3] << 8) | (data[0x04*i + 0] << 16) | (data[0x04*i + 1] << 24);
|
|
}
|
|
/* data32 also has extra ints probably against outside reads, which wasn't originally
|
|
* memset'ed but we'll do just in case (doesn't seem to matter) */
|
|
for (i = (expected_frame_size >> 2); i < 0x78/4 + 2; i++) {
|
|
data_u32[i] = 0;
|
|
}
|
|
|
|
bitpos = 0;
|
|
}
|
|
|
|
/* decode amplitude envelope scales */
|
|
{
|
|
int rms_index = 0; /* amplitudes are root-mean-square */
|
|
int region;
|
|
|
|
/* get amplitude envelope (5b) for region 0 */
|
|
for (i = 0; i < 5; i++) {
|
|
int bit = (data_u32[bitpos >> 5] >> (31 - (bitpos & 0x1F))) & 1;
|
|
bitpos++;
|
|
|
|
rms_index = (rms_index << 1) | bit;
|
|
}
|
|
absolute_region_power_index[0] = rms_index - ESF_ADJUSTMENT_TO_RMS_INDEX;
|
|
|
|
/* get amplitudes for other regions, coded differentially based on region 0 (done with a temp array in refdec) */
|
|
for (region = 1; region < NUMBER_OF_REGIONS; region++) {
|
|
int diff_index = 0;
|
|
int region_index = region > 13 ? 13 - 1 : region - 1;
|
|
|
|
do {
|
|
int bit = (data_u32[bitpos >> 5] >> (31 - (bitpos & 0x1F))) & 1;
|
|
bitpos++;
|
|
|
|
diff_index = differential_region_power_decoder_tree[region_index][diff_index][bit];
|
|
}
|
|
while (diff_index > 0);
|
|
|
|
absolute_region_power_index[region] = absolute_region_power_index[region-1] - diff_index - DRP_DIFF_MIN;
|
|
}
|
|
}
|
|
|
|
/* read categorization info bits */
|
|
{
|
|
categorization_control = 0;
|
|
for (i = 0; i < NUM_CATEGORIZATION_CONTROL_BITS; i++) {
|
|
int bit = (data_u32[bitpos >> 5] >> (31 - (bitpos & 0x1F))) & 1;
|
|
bitpos++;
|
|
|
|
categorization_control = (categorization_control << 1) | bit;
|
|
}
|
|
}
|
|
|
|
/* determine categorization config per region */
|
|
res = categorize(
|
|
8 * expected_frame_size - bitpos,
|
|
absolute_region_power_index, power_categories, category_balances);
|
|
if (res < 0) return res;
|
|
|
|
/* adjust power categories (rate_adjust_categories) */
|
|
{
|
|
for (i = 0; i < categorization_control; i++) {
|
|
int region = category_balances[i];
|
|
power_categories[region]++;
|
|
}
|
|
}
|
|
|
|
/* recover amplitude envelope deviation (done in decode_envelope in refdec) */
|
|
{
|
|
int region, region_index, max_index /*, test_index*/;
|
|
|
|
/* vs refdec: Namco *doesn't* calc test_index here, so resulting region_index
|
|
* can be +-1 vs refdec, and final samples around +-10 (usually quieter).
|
|
* Also reuses and mods absolute_region_power_index but we have decoder_region_standard_deviation for clarity */
|
|
|
|
//test_index = 0;
|
|
max_index = 0;
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
region_index = absolute_region_power_index[region];
|
|
if (max_index < region_index)
|
|
max_index = region_index;
|
|
//test_index += region_standard_deviation_table[region_index + REGION_POWER_TABLE_NUM_NEGATIVES];
|
|
}
|
|
|
|
max_index += REGION_POWER_TABLE_NUM_NEGATIVES;
|
|
region_index = 9;
|
|
while ((region_index >= 0) && (/*test_index >= 8 ||*/ max_index > 28)) {
|
|
max_index -= 2;
|
|
region_index--;
|
|
//test_index /= 2;
|
|
}
|
|
|
|
for (region = 0; region < NUMBER_OF_REGIONS; region++) {
|
|
int rsd_index = absolute_region_power_index[region] + REGION_POWER_TABLE_NUM_NEGATIVES + region_index * 2;
|
|
decoder_region_standard_deviation[region] = region_standard_deviation_table[rsd_index];
|
|
}
|
|
|
|
*p_mag_shift = region_index;
|
|
}
|
|
|
|
/* decode the quantized bits into MLT coefs */
|
|
res = decode_vector_quantized_mlt_indices(
|
|
data_u32, &bitpos, 8 * expected_frame_size,
|
|
p_random_value,
|
|
decoder_region_standard_deviation, power_categories, mlt_coefs);
|
|
if (res < 0) return res;
|
|
|
|
|
|
/* test for errors (in refdec but not Namco's, useful to detect decryption) */
|
|
if (test_errors) {
|
|
int max_pad_bytes = 0x8; /* usually 0x04 and rarely ~0x08 */
|
|
int bits_left = 8 * expected_frame_size - bitpos;
|
|
int i, endpos, test_bits;
|
|
|
|
if (bits_left > 0) {
|
|
|
|
/* frame must be padded with 1s after regular data */
|
|
endpos = bitpos;
|
|
for (i = 0; i < bits_left; i++) {
|
|
int bit = (data_u32[endpos >> 5] >> (31 - (endpos & 0x1F))) & 1;
|
|
endpos++;
|
|
|
|
if (bit == 0)
|
|
return -1;
|
|
}
|
|
|
|
/* extra: test we aren't in the middle of padding (happens with bad keys, this test catches most)
|
|
* After reading the whole frame, last bit position should land near last useful
|
|
* data, a few bytes into padding, so check there aren't too many padding bits. */
|
|
endpos = bitpos;
|
|
test_bits = 8 * max_pad_bytes;
|
|
if (test_bits > bitpos)
|
|
test_bits = bitpos;
|
|
for (i = 0; i < test_bits; i++) {
|
|
int bit = (data_u32[endpos >> 5] >> (31 - (endpos & 0x1F))) & 1;
|
|
endpos--; /* from last position towards valid data */
|
|
|
|
if (bit != 1)
|
|
break;
|
|
}
|
|
|
|
if (i == test_bits)
|
|
return -8;
|
|
|
|
}
|
|
else {
|
|
/* ? */
|
|
if (categorization_control < NUM_CATEGORIZATION_CONTROL_BITS - 1 && bits_left < 0)
|
|
return -2;
|
|
}
|
|
|
|
for (i = 0; i < NUMBER_OF_REGIONS; i++) {
|
|
if ((absolute_region_power_index[i] + ESF_ADJUSTMENT_TO_RMS_INDEX > 31) ||
|
|
(absolute_region_power_index[i] + ESF_ADJUSTMENT_TO_RMS_INDEX < -8))
|
|
return -4;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
* API
|
|
*****************************************************************************/
|
|
|
|
struct g7221_handle {
|
|
/* control */
|
|
int bit_rate;
|
|
int frame_size;
|
|
int test_errors;
|
|
/* AES setup/state */
|
|
s14aes_handle* aes;
|
|
/* state */
|
|
int16_t mlt_coefs[MAX_DCT_LENGTH];
|
|
int16_t old_samples[MAX_DCT_LENGTH >> 1];
|
|
uint32_t random_value;
|
|
};
|
|
|
|
g7221_handle* g7221_init(int bytes_per_frame) {
|
|
g7221_handle* handle = NULL;
|
|
int bit_rate;
|
|
|
|
/* valid only: 0x78, 0x50 or 0x3c */
|
|
bit_rate = bytes_per_frame * 8 * 50;
|
|
if (bit_rate != 24000 && bit_rate != 32000 && bit_rate != 48000)
|
|
goto fail;
|
|
|
|
handle = calloc(1, sizeof(g7221_handle));
|
|
if (!handle) goto fail;
|
|
|
|
handle->bit_rate = bit_rate;
|
|
handle->frame_size = bytes_per_frame;
|
|
|
|
g7221_reset(handle);
|
|
|
|
return handle;
|
|
fail:
|
|
free(handle);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
int g7221_decode_frame(g7221_handle* handle, uint8_t* data, int16_t* out_samples) {
|
|
int res;
|
|
int mag_shift;
|
|
int encrypted = handle->aes != NULL;
|
|
|
|
/* first 0x10 bytes may be encrypted with AES. Original code also saves encrypted bytes,
|
|
* then re-crypts after unpacking, presumably to guard against memdumps. */
|
|
if (encrypted) {
|
|
s14aes_decrypt(handle->aes, data);
|
|
}
|
|
|
|
/* Namco's decoder is designed so that out_samples can be set in place of mlt_coefs,
|
|
* so we could avoid one extra buffer, but for clarity we'll leave as is */
|
|
|
|
/* unpack data into MLT spectrum coefs */
|
|
res = unpack_frame(handle->bit_rate, data, handle->frame_size, &mag_shift, handle->mlt_coefs, &handle->random_value, handle->test_errors);
|
|
if (res < 0) goto fail;
|
|
|
|
/* convert coefs to samples using reverse (inverse) MLT */
|
|
res = rmlt_coefs_to_samples(mag_shift, handle->mlt_coefs, handle->old_samples, out_samples);
|
|
if (res < 0) goto fail;
|
|
|
|
/* Namco also sets number of codes/samples done from unpack_frame/rmlt (ptr arg),
|
|
* but they seem unused */
|
|
|
|
return 0;
|
|
fail:
|
|
return res;
|
|
}
|
|
|
|
#if 0
|
|
int g7221_decode_empty(g7221_handle* handle, int16_t* out_samples) {
|
|
static const uint8_t empty_frame[0x3c] = {
|
|
0x1E,0x0B,0x89,0x40,0x02,0x4F,0x51,0x35, 0x10,0xA1,0xFE,0xDF,0x52,0x51,0x10,0x0B,
|
|
0xF0,0x69,0x7B,0xAE,0x18,0x17,0x00,0x52, 0x07,0x74,0xF4,0x65,0xA2,0x58,0xD8,0x3F,
|
|
0xD9,0xAA,0x65,0x35,0x2A,0x14,0xE3,0x58, 0xD7,0xC0,0xD2,0x02,0x5B,0x0E,0x2A,0x98,
|
|
0xA3,0x04,0x5E,0x51,0xE5,0xC5,0xB2,0x14, 0xBF,0x58,0xFF,0xFF
|
|
};
|
|
int res;
|
|
int mag_shift;
|
|
|
|
/* This only seems to exist in older exes. Namco's samples don't reach EOF, so this
|
|
* wouldn't need to be called. Doesn't seem to use encoder delay either. */
|
|
|
|
res = unpack_frame(24000, empty_frame, 0x3c, &mag_shift, handle->mlt_coefs, &handle->random_value);
|
|
if (res) goto fail;
|
|
|
|
/* convert coefs to samples using reverse (inverse) MLT */
|
|
res = rmlt_coefs_to_samples(mag_shift, handle->mlt_coefs, handle->old_samples, out_samples);
|
|
if (res) goto fail;
|
|
|
|
return 1;
|
|
fail:
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
void g7221_reset(g7221_handle* handle) {
|
|
|
|
/* initialize old values (others get overwritten) */
|
|
memset(&handle->old_samples, 0, sizeof(handle->old_samples));
|
|
|
|
/* initialize the random number generator */
|
|
handle->random_value = 0x10001;
|
|
|
|
/* vs refdec: different default random. Namco used a global, so maybe multiple
|
|
* bnsf playing at the same time would get slightly different results */
|
|
}
|
|
|
|
void g7221_free(g7221_handle* handle) {
|
|
if (!handle)
|
|
return;
|
|
|
|
s14aes_close(handle->aes);
|
|
free(handle);
|
|
}
|
|
|
|
int g7221_set_key(g7221_handle* handle, const uint8_t* key) {
|
|
const int key_size = 192 / 8; /* only 192 bit mode */
|
|
uint8_t temp_key[192 / 8];
|
|
const char* mod_key = "Ua#oK3P94vdxX,ft*k-mnjoO"; /* constant for all platform/games */
|
|
int i;
|
|
|
|
if (!handle)
|
|
goto fail;
|
|
|
|
/* disable, useful for testing? */
|
|
if (key == NULL) {
|
|
s14aes_close(handle->aes);
|
|
handle->aes = NULL;
|
|
handle->test_errors = 1; /* force? */
|
|
return 1;
|
|
}
|
|
|
|
/* init AES state (tables) or reuse if already exists */
|
|
if (handle->aes == NULL) {
|
|
handle->aes = s14aes_init();
|
|
if (!handle->aes) goto fail;
|
|
}
|
|
|
|
handle->test_errors = 1;
|
|
|
|
/* Base key is XORed probably against memdumps, as plain key would be part of the final AES key. However
|
|
* roundkey is still in memdumps near AES state (~0x1310 from sbox table, that starts with 0x63,0x7c,0x77,0x7b...)
|
|
* so it isn't too effective. XORing was originally done inside aes_expand_key during S14/S22 init. */
|
|
for (i = 0; i < key_size; i++) {
|
|
temp_key[i] = key[i] ^ mod_key[i];
|
|
}
|
|
|
|
/* reset new key */
|
|
s14aes_set_key(handle->aes, temp_key);
|
|
|
|
return 0;
|
|
fail:
|
|
return -1;
|
|
}
|