mirror of
https://github.com/vgmstream/vgmstream.git
synced 2024-11-24 23:10:10 +01:00
Improve tri-Ace codec joint stereo mode
This commit is contained in:
parent
2081a5b322
commit
822f4595b8
@ -46,6 +46,7 @@
|
||||
#define TAC_CODED_BANDS 27
|
||||
#define TAC_CODED_COEFS 32
|
||||
#define TAC_TOTAL_POINTS 32 /* not sure about this term */
|
||||
#define TAC_SCALE_TABLE_MAX_INDEX 511
|
||||
|
||||
|
||||
struct tac_handle_t {
|
||||
@ -66,7 +67,7 @@ struct tac_handle_t {
|
||||
int16_t codes[TAC_CHANNELS][TAC_FRAME_SAMPLES];
|
||||
|
||||
/* decoding vector state */
|
||||
REG_VF spectrum[TAC_FRAME_SAMPLES / 4]; /* temp huffman-to-coefs (could be stack) */
|
||||
REG_VF spectrum[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* temp huffman-to-coefs */
|
||||
REG_VF wave[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* final samples, in vector form */
|
||||
REG_VF hist[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* saved between frames */
|
||||
};
|
||||
@ -121,6 +122,17 @@ static void unpack_antialias(REG_VF* spectrum) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline int16_t clamp_s16(int16_t value, int16_t min, int16_t max) {
|
||||
if (value < min)
|
||||
return min;
|
||||
else if (value > max)
|
||||
return max;
|
||||
else
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
/* converts 4 huffman codes to 4 spectrums coefs */
|
||||
//SUB_1188
|
||||
static void unpack_code4(REG_VF* spectrum, const REG_VF* spc1, const REG_VF* spc2, const REG_VF* code, const REG_VF* idx, int out_pos) {
|
||||
@ -170,15 +182,21 @@ static void unpack_code4(REG_VF* spectrum, const REG_VF* spc1, const REG_VF* spc
|
||||
STORE(_xyzw, spectrum, &out, out_pos);
|
||||
}
|
||||
|
||||
|
||||
/* Unpacks huffman codes in one band into 32 spectrum coefs, using selected scales for that band. */
|
||||
// SUB_C88
|
||||
static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, int* code_pos, int out_pos) {
|
||||
const REG_VF* ST = SCALE_TABLE;
|
||||
int i;
|
||||
int16_t base_index = codes[0]; /* vector table index, max ~35 */
|
||||
int16_t band_index = codes[band_pos]; /* vector too */
|
||||
int16_t base_index = codes[0]; /* table index, max ~35 */
|
||||
int16_t band_index = codes[band_pos]; /* table too */
|
||||
REG_VF scale;
|
||||
|
||||
/* bad values should be caught by CRC check but for completeness */
|
||||
base_index = clamp_s16(base_index, 0, TAC_SCALE_TABLE_MAX_INDEX);
|
||||
band_index = clamp_s16(band_index, 0, TAC_SCALE_TABLE_MAX_INDEX-128);
|
||||
|
||||
|
||||
/* index zero = band is not coded and all of its coefs are 0 */
|
||||
if (band_index == 0) {
|
||||
for (i = 0; i < (TAC_CODED_COEFS / 4); i++) {
|
||||
@ -188,7 +206,7 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in
|
||||
}
|
||||
|
||||
/* put final band scale at .y */
|
||||
MULy (__y__, &scale, &ST[0x80 + band_index], &ST[base_index]);
|
||||
MULy (__y__, &scale, &ST[128 + band_index], &ST[base_index]);
|
||||
|
||||
/* unpack coefs */
|
||||
for (i = 0; i < 8; i++) {
|
||||
@ -196,12 +214,12 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in
|
||||
REG_VF spc1, spc2;
|
||||
|
||||
COPY (_xyzw, &code, &codes[(*code_pos)]);
|
||||
(*code_pos) += 4;
|
||||
(*code_pos) += 4;
|
||||
|
||||
/* scale coef then round down to int to get table indexes (!!!) */
|
||||
ABS (_xyzw, &tm01, &code);
|
||||
MULy (_xyzw, &tm01, &tm01, &scale);
|
||||
FMUL (_xyzw, &tm02, &tm01, 512.0);
|
||||
FMUL (_xyzw, &tm02, &tm01, 512.0); /* 512 = SCALE_TABLE max */
|
||||
ADD (_xyzw, &tm03, &tm02, &VECTOR_ONE);
|
||||
|
||||
FTOI0(_xyzw, &idx, &tm02); /* keep idx as int for later (probably could use (int)f.N too) */
|
||||
@ -215,12 +233,18 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in
|
||||
SUB (_xyzw, &spc1, &tm01, &tm02);
|
||||
SUB (_xyzw, &spc2, &tm03, &tm02);
|
||||
|
||||
/* Also just in case. In rare cases index may access 511+1 but table takes this into account */
|
||||
idx.i.x = clamp_s16(idx.i.x, 0, TAC_SCALE_TABLE_MAX_INDEX);
|
||||
idx.i.y = clamp_s16(idx.i.y, 0, TAC_SCALE_TABLE_MAX_INDEX);
|
||||
idx.i.z = clamp_s16(idx.i.z, 0, TAC_SCALE_TABLE_MAX_INDEX);
|
||||
idx.i.w = clamp_s16(idx.i.w, 0, TAC_SCALE_TABLE_MAX_INDEX);
|
||||
|
||||
unpack_code4(spectrum, &spc1, &spc2, &code, &idx, out_pos + i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Unpacks frame's huffman codes to spectrum coefs. Also done in the VU1 (uses VIFcode UNPACK V4-16
|
||||
* to copy 16b huffman codes to VU1 memory as 32b first) but simplified a bit here. */
|
||||
/* Unpacks channel's huffman codes to spectrum coefs. Also done in the VU1 (uses VIFcode UNPACK V4-16
|
||||
* to copy 16b huffman codes to VU1 memory as 32b first) but it's simplified a bit here. */
|
||||
// SUB_6E0
|
||||
static void unpack_channel(REG_VF* spectrum, const int16_t* codes) {
|
||||
int i;
|
||||
@ -349,7 +373,7 @@ static void process(REG_VF* wave, REG_VF* hist) {
|
||||
/* WTF is going on here? Yeah, no clue. Probably some multi-step FFT/DCT twiddle thing.
|
||||
* Remember all those separate ops are left as-is to allow PS2 float simulation (disabled though).
|
||||
* Tried cleaning up some more but... */
|
||||
ADDw (_x___, &tm10, &tm01, &tm00);
|
||||
ADDw (_x___, &tm10, &tm01, &tm00);
|
||||
ADDx (____w, &tm10, &tm01, &tm02);
|
||||
ADDx (____w, &tm11, &tm02, &tm03);
|
||||
ADDw (_x___, &tm12, &tm04, &tm03);
|
||||
@ -859,6 +883,18 @@ static void process(REG_VF* wave, REG_VF* hist) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Fix joint stereo files that only encode diffs in R (assumed, double check) */
|
||||
static void parse_joint_stereo(REG_VF* resultL, REG_VF* resultR) {
|
||||
int i;
|
||||
|
||||
/* Combine OG L sample + R diff. For pseudo-mono files R is all 0s
|
||||
* (R only saves 28 huffman codes, signalling no coefs per 1+27 bands) */
|
||||
for (i = 0; i < TAC_TOTAL_POINTS * 8; i++) {
|
||||
ADD (_xyzw, &resultR[i], &resultL[i], &resultR[i]);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* main decoding in the VU1 coprocessor */
|
||||
@ -866,18 +902,23 @@ static void decode_vu1(tac_handle_t* h) {
|
||||
int ch;
|
||||
|
||||
for (ch = 0; ch < TAC_CHANNELS; ch++) {
|
||||
unpack_channel(h->spectrum, h->codes[ch]);
|
||||
unpack_channel(h->spectrum[ch], h->codes[ch]);
|
||||
|
||||
transform(h->wave[ch], h->spectrum);
|
||||
transform(h->wave[ch], h->spectrum[ch]);
|
||||
|
||||
process(h->wave[ch], h->hist[ch]);
|
||||
}
|
||||
|
||||
/* Decoded data is originally stored in VUMem1 as clamped ints, though final step
|
||||
* seems may be done done externally (StFlushWriteBuffer/StMakeFinalOut?) */
|
||||
|
||||
/* this step may be outside VU1 code */
|
||||
if (h->header.joint_stereo) {
|
||||
parse_joint_stereo(h->wave[0], h->wave[1]);
|
||||
}
|
||||
}
|
||||
|
||||
/* read huffman codes for all channels */
|
||||
/* read huffman codes for all channels (max per channel 27*32 = 864 + 27 + 1 = 892) */
|
||||
static int read_codes(tac_handle_t* h, const uint8_t* ptr, uint16_t huff_flag, uint32_t huff_cfg) {
|
||||
int huff_count = 0;
|
||||
int ch;
|
||||
@ -996,14 +1037,14 @@ static uint16_t get_u16le(const uint8_t* mem) {
|
||||
|
||||
static int init_header(tac_header_t* header, const uint8_t* buf) {
|
||||
header->huffman_offset = get_u32le(buf+0x00);
|
||||
header->unknown1 = get_u32le(buf+0x04);
|
||||
header->unknown = get_u32le(buf+0x04);
|
||||
header->loop_frame = get_u16le(buf+0x08);
|
||||
header->loop_discard = get_u16le(buf+0x0A);
|
||||
header->frame_count = get_u16le(buf+0x0C);
|
||||
header->frame_discard = get_u16le(buf+0x0E);
|
||||
header->loop_offset = get_u32le(buf+0x10);
|
||||
header->file_size = get_u32le(buf+0x14);
|
||||
header->unknown2 = get_u32le(buf+0x18);
|
||||
header->joint_stereo = get_u32le(buf+0x18);
|
||||
header->empty = get_u32le(buf+0x1c);
|
||||
|
||||
/* huffman table offset should make sense */
|
||||
@ -1019,7 +1060,7 @@ static int init_header(tac_header_t* header, const uint8_t* buf) {
|
||||
if (header->loop_frame > header->frame_count || header->loop_offset > header->file_size)
|
||||
return TAC_PROCESS_HEADER_ERROR;
|
||||
/* just in case */
|
||||
if ((header->unknown2 != 0 && header->unknown2 != 1) || header->empty != 0)
|
||||
if ((header->joint_stereo != 0 && header->joint_stereo != 1) || header->empty != 0)
|
||||
return TAC_PROCESS_HEADER_ERROR;
|
||||
|
||||
return TAC_PROCESS_OK;
|
||||
@ -1206,7 +1247,7 @@ void tac_get_samples_pcm16(tac_handle_t* handle, int16_t* dst) {
|
||||
int ch, i;
|
||||
int chs = TAC_CHANNELS;
|
||||
|
||||
for (ch = 0; ch < chs; ch++) {
|
||||
for (ch = 0; ch < chs; ch++) {
|
||||
int s = 0;
|
||||
for (i = 0; i < TAC_FRAME_SAMPLES / 4; i++) {
|
||||
dst[(s+0)*chs + ch] = clamp16f(handle->wave[ch][i].f.x);
|
||||
|
@ -24,14 +24,14 @@ typedef struct tac_handle_t tac_handle_t;
|
||||
typedef struct {
|
||||
/* 0x20 header config */
|
||||
uint32_t huffman_offset; /* setup */
|
||||
uint32_t unknown1; /* ignored? may be CDVD stuff (divided/multiplied during PS2 process), not file size related */
|
||||
uint32_t unknown; /* ignored? may be CDVD stuff (divided/multiplied during PS2 process), not file size related */
|
||||
uint16_t loop_frame; /* aligned to block stard */
|
||||
uint16_t loop_discard; /* assumed */
|
||||
uint16_t frame_count; /* number of valid frames ("block end" frame not included) */
|
||||
uint16_t frame_discard; /* assumed */
|
||||
uint32_t loop_offset; /* file size if not looped */
|
||||
uint32_t file_size; /* actual file size can be a bit smaller if last block is truncated */
|
||||
uint32_t unknown2; /* usually 0 and rarely 1 (R channel has less data, joint stereo mode?) */
|
||||
uint32_t joint_stereo; /* usually 0 and rarely 1 */
|
||||
uint32_t empty; /* null? */
|
||||
} tac_header_t;
|
||||
|
||||
|
@ -3,17 +3,17 @@
|
||||
|
||||
/* VU1 register simulation, needs type conversion at times (should be optimized out by compiler). */
|
||||
typedef union {
|
||||
struct {
|
||||
float x,y,z,w;
|
||||
} f;
|
||||
struct {
|
||||
float x,y,z,w;
|
||||
} f;
|
||||
|
||||
struct {
|
||||
uint32_t x,y,z,w;
|
||||
} i;
|
||||
struct {
|
||||
uint32_t x,y,z,w;
|
||||
} i;
|
||||
|
||||
float F[4];
|
||||
uint32_t UL[4];
|
||||
int32_t SL[4];
|
||||
float F[4];
|
||||
uint32_t UL[4];
|
||||
int32_t SL[4];
|
||||
/* can access as US/SS/etc but not needed by current code */
|
||||
} _REG_VF;
|
||||
typedef _REG_VF REG_VF;
|
||||
|
@ -8,8 +8,8 @@
|
||||
* registers like the ACC, and updates zero/neg/etc flags per op (plus added here a few helper ops).
|
||||
* Main reason to use them vs doing standard +*-/ in code is allowing to simulate PS2 floats.
|
||||
* See Nisto's decoder for actual emulation. */
|
||||
|
||||
|
||||
|
||||
|
||||
/* PS2 floats are slightly different vs IEEE 754 floats:
|
||||
* - NaN and Inf (exp 255) don't exist on the PS2, meaning it has a bigger range of floats
|
||||
* - denormals (exp 0) don't exist either, and ops truncate to 0
|
||||
@ -21,7 +21,7 @@
|
||||
|
||||
static inline void UPDATE_FLOATS(uint8_t dest, REG_VF *vf) {
|
||||
#if TAC_ENABLE_PS2_FLOATS
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
int shift = 3 - i;
|
||||
@ -44,7 +44,7 @@ static inline void UPDATE_FLOATS(uint8_t dest, REG_VF *vf) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -61,7 +61,6 @@ static inline void _DIV_INTERNAL(REG_VF *fd, const REG_VF *fs, const REG_VF *ft,
|
||||
else {
|
||||
fd->UL[from] = 0x7F7FFFFF;
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
fd->F[from] = dividend / divisor;
|
||||
|
Loading…
Reference in New Issue
Block a user