diff --git a/src/coding/tac_decoder_lib.c b/src/coding/tac_decoder_lib.c index 2e0d145f..4627f645 100644 --- a/src/coding/tac_decoder_lib.c +++ b/src/coding/tac_decoder_lib.c @@ -46,6 +46,7 @@ #define TAC_CODED_BANDS 27 #define TAC_CODED_COEFS 32 #define TAC_TOTAL_POINTS 32 /* not sure about this term */ +#define TAC_SCALE_TABLE_MAX_INDEX 511 struct tac_handle_t { @@ -66,7 +67,7 @@ struct tac_handle_t { int16_t codes[TAC_CHANNELS][TAC_FRAME_SAMPLES]; /* decoding vector state */ - REG_VF spectrum[TAC_FRAME_SAMPLES / 4]; /* temp huffman-to-coefs (could be stack) */ + REG_VF spectrum[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* temp huffman-to-coefs */ REG_VF wave[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* final samples, in vector form */ REG_VF hist[TAC_CHANNELS][TAC_FRAME_SAMPLES / 4]; /* saved between frames */ }; @@ -121,6 +122,17 @@ static void unpack_antialias(REG_VF* spectrum) { } } + +static inline int16_t clamp_s16(int16_t value, int16_t min, int16_t max) { + if (value < min) + return min; + else if (value > max) + return max; + else + return value; +} + + /* converts 4 huffman codes to 4 spectrums coefs */ //SUB_1188 static void unpack_code4(REG_VF* spectrum, const REG_VF* spc1, const REG_VF* spc2, const REG_VF* code, const REG_VF* idx, int out_pos) { @@ -170,15 +182,21 @@ static void unpack_code4(REG_VF* spectrum, const REG_VF* spc1, const REG_VF* spc STORE(_xyzw, spectrum, &out, out_pos); } + /* Unpacks huffman codes in one band into 32 spectrum coefs, using selected scales for that band. */ // SUB_C88 static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, int* code_pos, int out_pos) { const REG_VF* ST = SCALE_TABLE; int i; - int16_t base_index = codes[0]; /* vector table index, max ~35 */ - int16_t band_index = codes[band_pos]; /* vector too */ + int16_t base_index = codes[0]; /* table index, max ~35 */ + int16_t band_index = codes[band_pos]; /* table too */ REG_VF scale; + /* bad values should be caught by CRC check but for completeness */ + base_index = clamp_s16(base_index, 0, TAC_SCALE_TABLE_MAX_INDEX); + band_index = clamp_s16(band_index, 0, TAC_SCALE_TABLE_MAX_INDEX-128); + + /* index zero = band is not coded and all of its coefs are 0 */ if (band_index == 0) { for (i = 0; i < (TAC_CODED_COEFS / 4); i++) { @@ -188,7 +206,7 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in } /* put final band scale at .y */ - MULy (__y__, &scale, &ST[0x80 + band_index], &ST[base_index]); + MULy (__y__, &scale, &ST[128 + band_index], &ST[base_index]); /* unpack coefs */ for (i = 0; i < 8; i++) { @@ -196,12 +214,12 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in REG_VF spc1, spc2; COPY (_xyzw, &code, &codes[(*code_pos)]); - (*code_pos) += 4; + (*code_pos) += 4; /* scale coef then round down to int to get table indexes (!!!) */ ABS (_xyzw, &tm01, &code); MULy (_xyzw, &tm01, &tm01, &scale); - FMUL (_xyzw, &tm02, &tm01, 512.0); + FMUL (_xyzw, &tm02, &tm01, 512.0); /* 512 = SCALE_TABLE max */ ADD (_xyzw, &tm03, &tm02, &VECTOR_ONE); FTOI0(_xyzw, &idx, &tm02); /* keep idx as int for later (probably could use (int)f.N too) */ @@ -215,12 +233,18 @@ static void unpack_band(REG_VF* spectrum, const int16_t* codes, int band_pos, in SUB (_xyzw, &spc1, &tm01, &tm02); SUB (_xyzw, &spc2, &tm03, &tm02); + /* Also just in case. In rare cases index may access 511+1 but table takes this into account */ + idx.i.x = clamp_s16(idx.i.x, 0, TAC_SCALE_TABLE_MAX_INDEX); + idx.i.y = clamp_s16(idx.i.y, 0, TAC_SCALE_TABLE_MAX_INDEX); + idx.i.z = clamp_s16(idx.i.z, 0, TAC_SCALE_TABLE_MAX_INDEX); + idx.i.w = clamp_s16(idx.i.w, 0, TAC_SCALE_TABLE_MAX_INDEX); + unpack_code4(spectrum, &spc1, &spc2, &code, &idx, out_pos + i); } } -/* Unpacks frame's huffman codes to spectrum coefs. Also done in the VU1 (uses VIFcode UNPACK V4-16 - * to copy 16b huffman codes to VU1 memory as 32b first) but simplified a bit here. */ +/* Unpacks channel's huffman codes to spectrum coefs. Also done in the VU1 (uses VIFcode UNPACK V4-16 + * to copy 16b huffman codes to VU1 memory as 32b first) but it's simplified a bit here. */ // SUB_6E0 static void unpack_channel(REG_VF* spectrum, const int16_t* codes) { int i; @@ -349,7 +373,7 @@ static void process(REG_VF* wave, REG_VF* hist) { /* WTF is going on here? Yeah, no clue. Probably some multi-step FFT/DCT twiddle thing. * Remember all those separate ops are left as-is to allow PS2 float simulation (disabled though). * Tried cleaning up some more but... */ - ADDw (_x___, &tm10, &tm01, &tm00); + ADDw (_x___, &tm10, &tm01, &tm00); ADDx (____w, &tm10, &tm01, &tm02); ADDx (____w, &tm11, &tm02, &tm03); ADDw (_x___, &tm12, &tm04, &tm03); @@ -859,6 +883,18 @@ static void process(REG_VF* wave, REG_VF* hist) { } } + +/* Fix joint stereo files that only encode diffs in R (assumed, double check) */ +static void parse_joint_stereo(REG_VF* resultL, REG_VF* resultR) { + int i; + + /* Combine OG L sample + R diff. For pseudo-mono files R is all 0s + * (R only saves 28 huffman codes, signalling no coefs per 1+27 bands) */ + for (i = 0; i < TAC_TOTAL_POINTS * 8; i++) { + ADD (_xyzw, &resultR[i], &resultL[i], &resultR[i]); + } +} + /////////////////////////////////////////////////////////////////////////////// /* main decoding in the VU1 coprocessor */ @@ -866,18 +902,23 @@ static void decode_vu1(tac_handle_t* h) { int ch; for (ch = 0; ch < TAC_CHANNELS; ch++) { - unpack_channel(h->spectrum, h->codes[ch]); + unpack_channel(h->spectrum[ch], h->codes[ch]); - transform(h->wave[ch], h->spectrum); + transform(h->wave[ch], h->spectrum[ch]); process(h->wave[ch], h->hist[ch]); } /* Decoded data is originally stored in VUMem1 as clamped ints, though final step * seems may be done done externally (StFlushWriteBuffer/StMakeFinalOut?) */ + + /* this step may be outside VU1 code */ + if (h->header.joint_stereo) { + parse_joint_stereo(h->wave[0], h->wave[1]); + } } -/* read huffman codes for all channels */ +/* read huffman codes for all channels (max per channel 27*32 = 864 + 27 + 1 = 892) */ static int read_codes(tac_handle_t* h, const uint8_t* ptr, uint16_t huff_flag, uint32_t huff_cfg) { int huff_count = 0; int ch; @@ -996,14 +1037,14 @@ static uint16_t get_u16le(const uint8_t* mem) { static int init_header(tac_header_t* header, const uint8_t* buf) { header->huffman_offset = get_u32le(buf+0x00); - header->unknown1 = get_u32le(buf+0x04); + header->unknown = get_u32le(buf+0x04); header->loop_frame = get_u16le(buf+0x08); header->loop_discard = get_u16le(buf+0x0A); header->frame_count = get_u16le(buf+0x0C); header->frame_discard = get_u16le(buf+0x0E); header->loop_offset = get_u32le(buf+0x10); header->file_size = get_u32le(buf+0x14); - header->unknown2 = get_u32le(buf+0x18); + header->joint_stereo = get_u32le(buf+0x18); header->empty = get_u32le(buf+0x1c); /* huffman table offset should make sense */ @@ -1019,7 +1060,7 @@ static int init_header(tac_header_t* header, const uint8_t* buf) { if (header->loop_frame > header->frame_count || header->loop_offset > header->file_size) return TAC_PROCESS_HEADER_ERROR; /* just in case */ - if ((header->unknown2 != 0 && header->unknown2 != 1) || header->empty != 0) + if ((header->joint_stereo != 0 && header->joint_stereo != 1) || header->empty != 0) return TAC_PROCESS_HEADER_ERROR; return TAC_PROCESS_OK; @@ -1206,7 +1247,7 @@ void tac_get_samples_pcm16(tac_handle_t* handle, int16_t* dst) { int ch, i; int chs = TAC_CHANNELS; - for (ch = 0; ch < chs; ch++) { + for (ch = 0; ch < chs; ch++) { int s = 0; for (i = 0; i < TAC_FRAME_SAMPLES / 4; i++) { dst[(s+0)*chs + ch] = clamp16f(handle->wave[ch][i].f.x); diff --git a/src/coding/tac_decoder_lib.h b/src/coding/tac_decoder_lib.h index e2a743b7..5094b42f 100644 --- a/src/coding/tac_decoder_lib.h +++ b/src/coding/tac_decoder_lib.h @@ -24,14 +24,14 @@ typedef struct tac_handle_t tac_handle_t; typedef struct { /* 0x20 header config */ uint32_t huffman_offset; /* setup */ - uint32_t unknown1; /* ignored? may be CDVD stuff (divided/multiplied during PS2 process), not file size related */ + uint32_t unknown; /* ignored? may be CDVD stuff (divided/multiplied during PS2 process), not file size related */ uint16_t loop_frame; /* aligned to block stard */ uint16_t loop_discard; /* assumed */ uint16_t frame_count; /* number of valid frames ("block end" frame not included) */ uint16_t frame_discard; /* assumed */ uint32_t loop_offset; /* file size if not looped */ uint32_t file_size; /* actual file size can be a bit smaller if last block is truncated */ - uint32_t unknown2; /* usually 0 and rarely 1 (R channel has less data, joint stereo mode?) */ + uint32_t joint_stereo; /* usually 0 and rarely 1 */ uint32_t empty; /* null? */ } tac_header_t; diff --git a/src/coding/tac_decoder_lib_data.h b/src/coding/tac_decoder_lib_data.h index a53b8269..150a9bbe 100644 --- a/src/coding/tac_decoder_lib_data.h +++ b/src/coding/tac_decoder_lib_data.h @@ -3,17 +3,17 @@ /* VU1 register simulation, needs type conversion at times (should be optimized out by compiler). */ typedef union { - struct { - float x,y,z,w; - } f; + struct { + float x,y,z,w; + } f; - struct { - uint32_t x,y,z,w; - } i; + struct { + uint32_t x,y,z,w; + } i; - float F[4]; - uint32_t UL[4]; - int32_t SL[4]; + float F[4]; + uint32_t UL[4]; + int32_t SL[4]; /* can access as US/SS/etc but not needed by current code */ } _REG_VF; typedef _REG_VF REG_VF; diff --git a/src/coding/tac_decoder_lib_ops.h b/src/coding/tac_decoder_lib_ops.h index 1f3e77d5..6a265a5c 100644 --- a/src/coding/tac_decoder_lib_ops.h +++ b/src/coding/tac_decoder_lib_ops.h @@ -8,8 +8,8 @@ * registers like the ACC, and updates zero/neg/etc flags per op (plus added here a few helper ops). * Main reason to use them vs doing standard +*-/ in code is allowing to simulate PS2 floats. * See Nisto's decoder for actual emulation. */ - - + + /* PS2 floats are slightly different vs IEEE 754 floats: * - NaN and Inf (exp 255) don't exist on the PS2, meaning it has a bigger range of floats * - denormals (exp 0) don't exist either, and ops truncate to 0 @@ -21,7 +21,7 @@ static inline void UPDATE_FLOATS(uint8_t dest, REG_VF *vf) { #if TAC_ENABLE_PS2_FLOATS - int i; + int i; for (i = 0; i < 4; i++) { int shift = 3 - i; @@ -44,7 +44,7 @@ static inline void UPDATE_FLOATS(uint8_t dest, REG_VF *vf) { break; } } - } + } } #endif } @@ -61,7 +61,6 @@ static inline void _DIV_INTERNAL(REG_VF *fd, const REG_VF *fs, const REG_VF *ft, else { fd->UL[from] = 0x7F7FFFFF; } - } else { fd->F[from] = dividend / divisor;