Add XMA utils to fix encoder delay stuff

This commit is contained in:
bnnm 2018-11-18 17:00:48 +01:00
parent 2d48913b75
commit ef64889a28
2 changed files with 237 additions and 158 deletions

View File

@ -323,7 +323,6 @@ typedef struct {
/* output */ /* output */
int32_t num_samples; int32_t num_samples;
int32_t skip_samples;
int32_t loop_start_sample; int32_t loop_start_sample;
int32_t loop_end_sample; int32_t loop_end_sample;
} ms_sample_data; } ms_sample_data;
@ -334,6 +333,9 @@ void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_ali
void xma1_parse_fmt_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * loop_start_b, int32_t * loop_end_b, int32_t * loop_subframe, int be); void xma1_parse_fmt_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * loop_start_b, int32_t * loop_end_b, int32_t * loop_subframe, int be);
void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int * loop_flag, int32_t * out_num_samples, int32_t * out_loop_start_sample, int32_t * out_loop_end_sample, int be); void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int * loop_flag, int32_t * out_num_samples, int32_t * out_loop_start_sample, int32_t * out_loop_end_sample, int be);
void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * num_samples, int32_t * loop_start_sample, int32_t * loop_end_sample); void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * channels, int * sample_rate, int * loop_flag, int32_t * num_samples, int32_t * loop_start_sample, int32_t * loop_end_sample);
void xma_fix_raw_samples(VGMSTREAM *vgmstream, STREAMFILE*streamFile, off_t stream_offset, size_t stream_size, off_t chunk_offset, int fix_num_samples, int fix_loop_samples);
int riff_get_fact_skip_samples(STREAMFILE * streamFile, off_t start_offset); int riff_get_fact_skip_samples(STREAMFILE * streamFile, off_t start_offset);
size_t atrac3_bytes_to_samples(size_t bytes, int full_block_align); size_t atrac3_bytes_to_samples(size_t bytes, int full_block_align);

View File

@ -370,7 +370,7 @@ int ffmpeg_make_riff_xwma(uint8_t * buf, size_t buf_size, int codec, size_t data
put_16bitLE(buf+0x20, block_align); /* block align */ put_16bitLE(buf+0x20, block_align); /* block align */
put_16bitLE(buf+0x22, 16); /* bits per sample */ put_16bitLE(buf+0x22, 16); /* bits per sample */
put_16bitLE(buf+0x24, 0); /* extra size */ put_16bitLE(buf+0x24, 0); /* extra size */
/* here goes the "dpds" seek table, but it's optional and not needed by FFmpeg */ /* here goes the "dpds" seek table, but it's optional and not needed by FFmpeg (and also buggy) */
memcpy(buf+0x26, "data", 4); memcpy(buf+0x26, "data", 4);
put_32bitLE(buf+0x2a, data_size); /* data size */ put_32bitLE(buf+0x2a, data_size); /* data size */
@ -439,78 +439,86 @@ fail:
/* XMA PARSING */ /* XMA PARSING */
/* ******************************************** */ /* ******************************************** */
static void ms_audio_parse_header(STREAMFILE *streamFile, int xma_version, off_t offset_b, int bits_frame_size, size_t *first_frame_b, size_t *packet_skip_count, size_t *header_size_b) {
if (xma_version == 1) { /* XMA1 */
//packet_sequence = read_bitsBE_b(offset_b+0, 4, streamFile); /* numbered from 0 to N */
//unknown = read_bitsBE_b(offset_b+4, 2, streamFile); /* packet_metadata? (always 2) */
*first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside the packet */
*packet_skip_count = read_bitsBE_b(offset_b+21, 11, streamFile); /* packets to skip for next packet of this stream */
*header_size_b = 32;
} else if (xma_version == 2) { /* XMA2 */
//frame_count = read_bitsBE_b(offset_b+0, 6, streamFile); /* frames that begin in this packet */
*first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside this packet */
//packet_metadata = read_bitsBE_b(offset_b+21, 3, streamFile); /* packet_metadata (always 1) */
*packet_skip_count = read_bitsBE_b(offset_b+24, 8, streamFile); /* packets to skip for next packet of this stream */
*header_size_b = 32;
} else { /* WMAPRO(v3) */
//packet_sequence = read_bitsBE_b(offset_b+0, 4, streamFile); /* numbered from 0 to N */
//unknown = read_bitsBE_b(offset_b+4, 2, streamFile); /* packet_metadata? (always 2) */
*first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside the packet */
*packet_skip_count = 0; /* xwma has no need to skip packets since it uses real multichannel audio */
*header_size_b = 4+2+bits_frame_size; /* variable-sized header */
}
/* XMA2 packets with XMA1 RIFF (transmogrified), remove the packet metadata flag */
if (xma_version == 1 && (*packet_skip_count & 0x700) == 0x100) {
//VGM_LOG("MS_SAMPLES: XMA1 transmogrified packet header at 0x%lx\n", (off_t)offset_b/8);
*packet_skip_count &= ~0x100;
}
/* full packet skip, no new frames start in this packet (prev frames can end here)
* standardized to some value */
if (*packet_skip_count == 0x7FF) { /* XMA1, 11b */
VGM_LOG("MS_SAMPLES: XMA1 full packet_skip at 0x%"PRIx64"\n", (off64_t)offset_b/8);
*packet_skip_count = 0x800;
}
else if (*packet_skip_count == 0xFF) { /* XMA2, 8b*/
VGM_LOG("MS_SAMPLES: XMA2 full packet_skip at 0x%"PRIx64"\n", (off64_t)offset_b/8);
*packet_skip_count = 0x800;
}
/* unusual but not impossible, as the encoder can interleave packets in any way */
VGM_ASSERT((*packet_skip_count > 10 && *packet_skip_count < 0x800),
"MS_SAMPLES: found big packet skip %i at 0x%"PRIx64"\n", *packet_skip_count, (off64_t)offset_b/8);
}
/** /**
* Find total and loop samples of Microsoft audio formats (WMAPRO/XMA1/XMA2) by reading frame headers. * Find total and loop samples of Microsoft audio formats (WMAPRO/XMA1/XMA2) by reading frame headers.
* *
* The stream is made of packets, each containing N small frames of X samples. Frames are further divided into subframes. * The stream is made of packets, each containing N small frames of X samples. Frames are further divided into subframes.
* XMA1/XMA2 can divided into streams for multichannel (1/2ch ... 1/2ch). From the file start, packet 1..N is owned by * XMA1/XMA2 can divided into streams for multichannel (1/2ch ... 1/2ch). From the file start, packet 1..N is owned by
* stream 1..N. Then must follow "packet_skip" value to find the stream next packet, as they are arbitrarily interleaved. * stream 1..N. Then must follow "packet_skip" value to find the stream next packet, as they are arbitrarily interleaved.
* We only need to follow the first stream, as all must contain the same number of samples.
*
* XMA1/XMA2/WMAPRO data only differs in the packet headers. * XMA1/XMA2/WMAPRO data only differs in the packet headers.
*/ */
static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int start_packet, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int samples_per_subframe, int bits_frame_size) { static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int samples_per_subframe, int bits_frame_size) {
int frames = 0, samples = 0, loop_start_frame = 0, loop_end_frame = 0, start_skip = 0, end_skip = 0; int frames = 0, samples = 0, loop_start_frame = 0, loop_end_frame = 0;
size_t first_frame_b, packet_skip_count = 0, frame_size_b, packet_size_b, header_size_b; size_t first_frame_b, packet_skip_count, header_size_b, frame_size_b;
off_t offset_b, packet_offset_b, frame_offset_b; off_t offset_b, packet_offset_b, frame_offset_b;
size_t packet_size = bytes_per_packet; size_t packet_size = bytes_per_packet;
size_t packet_size_b = packet_size * 8;
off_t offset = msd->data_offset; off_t offset = msd->data_offset;
off_t max_offset = msd->data_offset + msd->data_size; off_t max_offset = msd->data_offset + msd->data_size;
off_t stream_offset_b = msd->data_offset * 8; off_t stream_offset_b = msd->data_offset * 8;
offset += start_packet * packet_size;
packet_size_b = packet_size * 8;
/* read packets */ /* read packets */
while (offset < max_offset) { while (offset < max_offset) {
offset_b = offset * 8; /* global offset in bits */ offset_b = offset * 8; /* global offset in bits */
offset += packet_size; /* global offset in bytes */ offset += packet_size; /* global offset in bytes */
/* packet header */ /* packet header */
if (msd->xma_version == 1) { /* XMA1 */ ms_audio_parse_header(streamFile, msd->xma_version, offset_b, bits_frame_size, &first_frame_b, &packet_skip_count, &header_size_b);
//packet_sequence = read_bitsBE_b(offset_b+0, 4, streamFile); /* numbered from 0 to N */ if (packet_skip_count > 0x7FF) {
//unknown = read_bitsBE_b(offset_b+4, 2, streamFile); /* packet_metadata? (always 2) */ continue; /* full skip */
first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside the packet */
packet_skip_count = read_bitsBE_b(offset_b+21, 11, streamFile); /* packets to skip for next packet of this stream */
header_size_b = 32;
} else if (msd->xma_version == 2) { /* XMA2 */
//frame_count = read_bitsBE_b(offset_b+0, 6, streamFile); /* frames that begin in this packet */
first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside this packet */
//packet_metadata = read_bitsBE_b(offset_b+21, 3, streamFile); /* packet_metadata (always 1) */
packet_skip_count = read_bitsBE_b(offset_b+24, 8, streamFile); /* packets to skip for next packet of this stream */
header_size_b = 32;
} else { /* WMAPRO(v3) */
//packet_sequence = read_bitsBE_b(offset_b+0, 4, streamFile); /* numbered from 0 to N */
//unknown = read_bitsBE_b(offset_b+4, 2, streamFile); /* packet_metadata? (always 2) */
first_frame_b = read_bitsBE_b(offset_b+6, bits_frame_size, streamFile); /* offset in bits inside the packet */
packet_skip_count = 0; /* xwma has no need to skip packets since it uses real multichannel audio */
header_size_b = 4+2+bits_frame_size; /* variable-sized header */
} }
/* XMA2 packets with XMA1 RIFF (transmogrified), remove the packet metadata flag */
if (msd->xma_version == 1 && (packet_skip_count & 0x700) == 0x100) {
//VGM_LOG("MS_SAMPLES: XMA1 transmogrified packet header at 0x%lx\n", (off_t)offset_b/8);
packet_skip_count &= ~0x100;
}
/* full packet skip, no new frames start in this packet (prev frames can end here) */
if (packet_skip_count == 0x7FF) { /* XMA1, 11b */
VGM_LOG("MS_SAMPLES: XMA1 full packet_skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
packet_skip_count = 0;
continue;
}
else if (packet_skip_count == 0xFF) { /* XMA2, 8b*/
VGM_LOG("MS_SAMPLES: XMA2 full packet_skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
packet_skip_count = 0;
continue;
}
offset += packet_size * (packet_skip_count); /* skip packets not owned by the first stream, since we only need samples from it */
/* unusual but not impossible, as the encoder can interleave packets in any way */
VGM_ASSERT(packet_skip_count > 10, "MS_SAMPLES: found big packet skip %i at 0x%"PRIx64"\n", packet_skip_count, (off64_t)offset_b/8);
packet_offset_b = header_size_b + first_frame_b; packet_offset_b = header_size_b + first_frame_b;
/* skip packets not owned by the first stream for next time */
offset += packet_size * (packet_skip_count);
/* read packet frames */ /* read packet frames */
@ -526,7 +534,6 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
/* frame header */ /* frame header */
frame_size_b = read_bitsBE_b(frame_offset_b, bits_frame_size, streamFile); frame_size_b = read_bitsBE_b(frame_offset_b, bits_frame_size, streamFile);
frame_offset_b += bits_frame_size; frame_offset_b += bits_frame_size;
//;VGM_LOG("MS_SAMPLES: frame_offset=0x%lx (0b%lx), frame_size=0x%x (0b%x)\n", (off_t)frame_offset_b/8,(off_t)frame_offset_b, frame_size_b/8, frame_size_b);
/* stop when packet padding starts (0x00 for XMA1 or 0xFF in XMA2) */ /* stop when packet padding starts (0x00 for XMA1 or 0xFF in XMA2) */
if (frame_size_b == 0 || frame_size_b == (0xffffffff >> (32 - bits_frame_size))) { if (frame_size_b == 0 || frame_size_b == (0xffffffff >> (32 - bits_frame_size))) {
@ -535,12 +542,77 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
packet_offset_b += frame_size_b; /* including header */ packet_offset_b += frame_size_b; /* including header */
/* find skips (info from FFmpeg) */ samples += samples_per_frame;
if (channels_per_packet && (msd->xma_version == 1 || msd->xma_version == 2)) { frames++;
int flag;
int tilehdr_size = 15; //todo incorrect but usable for XMA, fix for WMAPro (complex calcs, see ffmpeg decode_tilehdr)
frame_offset_b += tilehdr_size; /* last bit in frame = more frames flag, end packet to avoid reading garbage in some cases
* (last frame spilling to other packets also has this flag, though it's ignored here) */
if (packet_offset_b < packet_size_b && !read_bitsBE_b(offset_b + packet_offset_b - 1, 1, streamFile)) {
break;
}
}
}
/* result */
msd->num_samples = samples;
if (msd->loop_flag && loop_end_frame > loop_start_frame) {
msd->loop_start_sample = loop_start_frame * samples_per_frame + msd->loop_start_subframe * samples_per_subframe;
msd->loop_end_sample = loop_end_frame * samples_per_frame + (msd->loop_end_subframe) * samples_per_subframe;
}
/* the above can't properly read skips for WMAPro ATM, but should fixed to 1 frame anyway */
if (msd->xma_version == 0) {
msd->num_samples -= samples_per_frame; /* FFmpeg does skip this */
#if 0
msd->num_samples += (samples_per_frame / 2); /* but doesn't add extra samples */
#endif
}
}
/* simlar to the above but only gets skips */
static void ms_audio_get_skips(STREAMFILE *streamFile, int xma_version, off_t data_offset, int channels_per_packet, int bytes_per_packet, int samples_per_frame, int bits_frame_size, int *out_start_skip, int *out_end_skip) {
int start_skip = 0, end_skip = 0;
size_t first_frame_b, packet_skip_count, header_size_b, frame_size_b;
off_t offset_b, packet_offset_b, frame_offset_b;
size_t packet_size = bytes_per_packet;
size_t packet_size_b = packet_size * 8;
off_t offset = data_offset;
/* read packet */
{
offset_b = offset * 8; /* global offset in bits */
offset += packet_size; /* global offset in bytes */
/* packet header */
ms_audio_parse_header(streamFile, 2, offset_b, bits_frame_size, &first_frame_b, &packet_skip_count, &header_size_b);
if (packet_skip_count > 0x7FF) {
return; /* full skip */
}
packet_offset_b = header_size_b + first_frame_b;
/* read packet frames */
while (packet_offset_b < packet_size_b) {
frame_offset_b = offset_b + packet_offset_b; /* in bits for aligment stuff */
/* frame header */
frame_size_b = read_bitsBE_b(frame_offset_b, bits_frame_size, streamFile);
frame_offset_b += bits_frame_size;
/* stop when packet padding starts (0x00 for XMA1 or 0xFF in XMA2) */
if (frame_size_b == 0 || frame_size_b == (0xffffffff >> (32 - bits_frame_size))) {
break;
}
packet_offset_b += frame_size_b; /* including header */
/* find skips (info from FFmpeg) */
if (channels_per_packet && (xma_version == 1 || xma_version == 2)) {
int flag;
int len_tilehdr_size = 15; //todo incorrect but usable for XMA, fix for WMAPro (complex, see ffmpeg decode_tilehdr)
frame_offset_b += len_tilehdr_size;
/* ignore "postproc transform" */ /* ignore "postproc transform" */
if (channels_per_packet > 1) { if (channels_per_packet > 1) {
@ -555,7 +627,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
} }
} }
/* get start/end skips to get the proper number of samples */ /* get start/end skips to get the proper number of samples (both can be 0) */
flag = read_bitsBE_b(frame_offset_b, 1, streamFile); flag = read_bitsBE_b(frame_offset_b, 1, streamFile);
frame_offset_b += 1; frame_offset_b += 1;
if (flag) { if (flag) {
@ -564,8 +636,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
frame_offset_b += 1; frame_offset_b += 1;
if (flag) { if (flag) {
int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile); int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile);
VGM_LOG("MS_SAMPLES: start_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b); //;VGM_LOG("MS_SAMPLES: start_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
VGM_ASSERT(start_skip, "MS_SAMPLES: more than one start_skip (%i)\n", new_skip); //ignore, happens due to incorrect tilehdr_size
frame_offset_b += 10; frame_offset_b += 10;
if (new_skip > samples_per_frame) /* from xmaencode */ if (new_skip > samples_per_frame) /* from xmaencode */
@ -580,8 +651,7 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
frame_offset_b += 1; frame_offset_b += 1;
if (flag) { if (flag) {
int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile); int new_skip = read_bitsBE_b(frame_offset_b, 10, streamFile);
VGM_LOG("MS_SAMPLES: end_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b); //;VGM_LOG("MS_SAMPLES: end_skip %i at 0x%"PRIx64" (bit 0x%"PRIx64")\n", new_skip, (off64_t)frame_offset_b/8, (off64_t)frame_offset_b);
VGM_ASSERT(end_skip, "MS_SAMPLES: more than one end_skip (%i)\n", new_skip);//ignore, happens due to incorrect tilehdr_size
frame_offset_b += 10; frame_offset_b += 10;
if (new_skip > samples_per_frame) /* from xmaencode */ if (new_skip > samples_per_frame) /* from xmaencode */
@ -591,56 +661,15 @@ static void ms_audio_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, i
} }
} }
} }
samples += samples_per_frame;
frames++;
/* last bit in frame = more frames flag, end packet to avoid reading garbage in some cases
* (last frame spilling to other packets also has this flag, though it's ignored here) */
if (packet_offset_b < packet_size_b && !read_bitsBE_b(offset_b + packet_offset_b - 1, 1, streamFile)) {
break;
}
} }
} }
/* output results */
msd->num_samples = samples; if (out_start_skip) *out_start_skip = start_skip;
msd->skip_samples = start_skip; //todo remove once FFmpeg decodes correctly if (out_end_skip) *out_end_skip = end_skip;
if (msd->loop_flag && loop_end_frame > loop_start_frame) {
msd->loop_start_sample = loop_start_frame * samples_per_frame + msd->loop_start_subframe * samples_per_subframe;
msd->loop_end_sample = loop_end_frame * samples_per_frame + (msd->loop_end_subframe) * samples_per_subframe;
}
//todo apply once FFmpeg decode is ok
// for XMA must internal skip 64 samples + apply skips + output extra 128 IMDCT samples) and remove skip_samples output
#if 0
if (msd->xma_version == 1 || msd->xma_version == 2) {
msd->num_samples += 128; /* final extra IMDCT samples */
msd->num_samples -= start_skip; /* can be less but fixed to 512 in practice */
msd->num_samples -= end_skip;
/* from xma2encode tests this looks correct (probably XMA loops considering IMDCT delay)
* a full loop wav > xma makes start=384, then xma > wav writes "smpl" with start=0 */
if (msd->loop_flag) {
msd->loop_start_sample += 128;
msd->loop_start_sample -= start_skip;
msd->loop_end_sample += 128;
msd->loop_end_sample -= start_skip;
}
}
#endif
/* the above can't properly read skips for WMAPro ATM, but should fixed to 1 frame anyway */
if (msd->xma_version == 0) {
msd->num_samples -= samples_per_frame; /* FFmpeg does skip this */
#if 0
msd->num_samples += (samples_per_frame / 2); /* but doesn't add extra samples */
#endif
}
} }
static int wma_get_samples_per_frame(int version, int sample_rate, uint32_t decode_flags) { static int wma_get_samples_per_frame(int version, int sample_rate, uint32_t decode_flags) {
int frame_len_bits; int frame_len_bits;
@ -668,33 +697,40 @@ static int wma_get_samples_per_frame(int version, int sample_rate, uint32_t deco
return 1 << frame_len_bits; return 1 << frame_len_bits;
} }
void xma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile) { static int xma_get_channels_per_stream(STREAMFILE* streamFile, off_t chunk_offset, int channels) {
const int bytes_per_packet = 2048;
const int samples_per_frame = 512;
const int samples_per_subframe = 128;
int start_stream = 0; int start_stream = 0;
int channels_per_stream = 0; int channels_per_stream = 0;
/* get from stream config (needed to find skips) */ /* get from stream config (needed to find skips) */
if (msd->chunk_offset) { if (chunk_offset) {
int format = read_16bitLE(msd->chunk_offset,streamFile); int format = read_16bitLE(chunk_offset,streamFile);
if (format == 0x0165 || format == 0x6501) { /* XMA1 */ if (format == 0x0165 || format == 0x6501) { /* XMA1 */
channels_per_stream = read_8bit(msd->chunk_offset + 0x0C + 0x14*start_stream + 0x11,streamFile); channels_per_stream = read_8bit(chunk_offset + 0x0C + 0x14*start_stream + 0x11,streamFile);
} else if (format == 0x0166 || format == 0x6601) { /* new XMA2 */ } else if (format == 0x0166 || format == 0x6601) { /* new XMA2 */
channels_per_stream = msd->channels > 1 ? 2 : 1; channels_per_stream = channels > 1 ? 2 : 1;
} else { /* old XMA2 */ } else { /* old XMA2 */
int version = read_8bit(msd->chunk_offset,streamFile); int version = read_8bit(chunk_offset,streamFile);
channels_per_stream = read_8bit(msd->chunk_offset + 0x20 + (version==3 ? 0x00 : 0x08) + 0x4*start_stream + 0x00,streamFile); channels_per_stream = read_8bit(chunk_offset + 0x20 + (version==3 ? 0x00 : 0x08) + 0x4*start_stream + 0x00,streamFile);
} }
} }
else if (msd->channels) { else if (channels) {
channels_per_stream = msd->channels > 1 ? 2 : 1; channels_per_stream = channels == 1 ? 1 : 2; /* default for XMA without RIFF chunks, most common */
} }
if (channels_per_stream > 2) if (channels_per_stream > 2)
channels_per_stream = 0; channels_per_stream = 0;
ms_audio_get_samples(msd, streamFile, start_stream, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, 15); return channels_per_stream;
}
void xma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile) {
const int bytes_per_packet = 2048;
const int samples_per_frame = 512;
const int samples_per_subframe = 128;
const int bits_frame_size = 15;
int channels_per_stream = xma_get_channels_per_stream(streamFile, msd->chunk_offset, msd->channels);
ms_audio_get_samples(msd, streamFile, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size);
} }
void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) { void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) {
@ -703,7 +739,6 @@ void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_
int samples_per_frame = 0; int samples_per_frame = 0;
int samples_per_subframe = 0; int samples_per_subframe = 0;
int bits_frame_size = 0; int bits_frame_size = 0;
int start_packet = 0;
int channels_per_stream = msd->channels; int channels_per_stream = msd->channels;
if (!(decode_flags & 0x40)) { if (!(decode_flags & 0x40)) {
@ -716,7 +751,7 @@ void wmapro_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_
samples_per_subframe = 0; /* not needed as WMAPro can't use loop subframes (complex subframe lengths) */ samples_per_subframe = 0; /* not needed as WMAPro can't use loop subframes (complex subframe lengths) */
msd->xma_version = 0; /* signal it's not XMA */ msd->xma_version = 0; /* signal it's not XMA */
ms_audio_get_samples(msd, streamFile, start_packet, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size); ms_audio_get_samples(msd, streamFile, channels_per_stream, bytes_per_packet, samples_per_frame, samples_per_subframe, bits_frame_size);
} }
void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) { void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_align, int sample_rate, uint32_t decode_flags) {
@ -761,6 +796,81 @@ void wma_get_samples(ms_sample_data * msd, STREAMFILE *streamFile, int block_ali
} }
/* XMA hell for precise looping and gapless support, fixes raw sample values from headers
* that don't count XMA's final subframe/encoder delay/encoder padding, and FFmpeg stuff.
* Configurable since different headers vary for maximum annoyance. */
void xma_fix_raw_samples(VGMSTREAM *vgmstream, STREAMFILE*streamFile, off_t stream_offset, size_t stream_size, off_t chunk_offset, int fix_num_samples, int fix_loop_samples) {
const int bytes_per_packet = 2048;
const int samples_per_frame = 512;
const int samples_per_subframe = 128;
const int bits_frame_size = 15;
int xma_version = 2; /* works ok even for XMA1 */
int channels_per_stream = xma_get_channels_per_stream(streamFile, chunk_offset, vgmstream->channels);
off_t first_packet = stream_offset;
off_t last_packet = stream_offset + stream_size - bytes_per_packet;
int32_t start_skip = 0, end_skip = 0;
if (stream_offset + stream_size > get_streamfile_size(streamFile)) {
VGM_LOG("XMA SKIPS: ignoring bad stream offset+size vs real size\n");
return;
}
/* find delay/padding values in the bitstream (should be safe even w/ multistreams
* as every stream repeats them). Theoretically every packet could contain skips,
* doesn't happen in practice though. */
ms_audio_get_skips(streamFile, xma_version, first_packet, channels_per_stream, bytes_per_packet, samples_per_frame, bits_frame_size, &start_skip, NULL);
ms_audio_get_skips(streamFile, xma_version, last_packet, channels_per_stream, bytes_per_packet, samples_per_frame, bits_frame_size, NULL, &end_skip);
//;VGM_LOG("XMA SKIPS: apply start=%i, end=%i\n", start_skip, end_skip);
VGM_ASSERT(start_skip < samples_per_frame, "XMA SKIPS: small start skip\n");
if (end_skip == 512) { /* most likely a read bug */
VGM_LOG("XMA SKIPS: ignoring big end_skip\n");
end_skip = 0;
}
/* apply XMA extra samples */
if (fix_num_samples) {
vgmstream->num_samples += samples_per_subframe; /* final extra IMDCT samples */
vgmstream->num_samples -= start_skip; /* first samples skipped at the beginning */
vgmstream->num_samples -= end_skip; /* last samples discarded at the end */
}
/* from xma2encode tests this is correct (probably encodes/decodes loops considering all skips), ex.-
* full loop wav to xma makes start=384 (0 + ~512 delay - 128 padding), then xma to wav has "smpl" start=0 */
if (fix_loop_samples && vgmstream->loop_flag) {
vgmstream->loop_start_sample += samples_per_subframe;
vgmstream->loop_start_sample -= start_skip;
vgmstream->loop_end_sample += samples_per_subframe;
vgmstream->loop_end_sample -= start_skip;
/* since loops are adjusted this shouldn't happen (often loop_end == num_samples after applying all) */
if (vgmstream->loop_end_sample > vgmstream->num_samples &&
vgmstream->loop_end_sample - end_skip <= vgmstream->loop_end_sample) {
VGM_LOG("XMA SAMPLES: adjusted loop end\n");
vgmstream->loop_end_sample -= end_skip;
}
}
#ifdef VGM_USE_FFMPEG
/* also fix FFmpeg, since we now know exact skips */
{
ffmpeg_codec_data* data = vgmstream->codec_data;
/* FFmpeg doesn't XMA apply encoder delay ATM so here we fix it manually.
* XMA delay is part if the bitstream and while theoretically it could be any
* value (and is honored by xmaencoder), basically it's always 512.
*
* Somehow also needs to skip 64 extra samples (looks like another FFmpeg bug
* where XMA outputs half a subframe samples late, WMAPRO isn't affected),
* which sometimes makes FFmpeg complain (=reads after end) but doesn't seem audible. */
if (data->skipSamples == 0) {
ffmpeg_set_skip_samples(data, start_skip+64);
}
}
#endif
}
/* ******************************************** */ /* ******************************************** */
/* HEADER PARSING */ /* HEADER PARSING */
@ -808,6 +918,7 @@ void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int
loop_start_sample = read_32bit(chunk_offset+0x28,streamFile); loop_start_sample = read_32bit(chunk_offset+0x28,streamFile);
loop_end_sample = loop_start_sample + read_32bit(chunk_offset+0x2C,streamFile); loop_end_sample = loop_start_sample + read_32bit(chunk_offset+0x2C,streamFile);
loop_flag = (uint8_t)read_8bit(chunk_offset+0x30,streamFile) != 0; loop_flag = (uint8_t)read_8bit(chunk_offset+0x30,streamFile) != 0;
/* may need loop end +1, though some header doesn't need it (ex.- Sonic and Sega All Stars Racing .str) */
/* flag rarely set, use loop_end as marker */ /* flag rarely set, use loop_end as marker */
if (!loop_flag) { if (!loop_flag) {
@ -820,24 +931,7 @@ void xma2_parse_fmt_chunk_extra(STREAMFILE *streamFile, off_t chunk_offset, int
} }
} }
#if 0 //todo apply once FFmpeg decode is ok /* samples are "raw" values, must be fixed externally (see xma_fix_raw_samples) */
/* apply extra output + skips (see ms_audio_get_samples, approximate as find out with first and last frames) */
{
int start_skip = 512;
int end_skip = 0;
num_samples += 128;
num_samples -= start_skip;
num_samples -= end_skip;
if (loop_flag) {
loop_start_sample += 128;
loop_start_sample -= start_skip;
loop_end_sample += 128;
loop_end_sample -= start_skip;
}
}
#endif
if(out_num_samples) *out_num_samples = num_samples; if(out_num_samples) *out_num_samples = num_samples;
if(out_loop_start_sample) *out_loop_start_sample = loop_start_sample; if(out_loop_start_sample) *out_loop_start_sample = loop_start_sample;
if(out_loop_end_sample) *out_loop_end_sample = loop_end_sample; if(out_loop_end_sample) *out_loop_end_sample = loop_end_sample;
@ -862,6 +956,7 @@ void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * out
loop_end_sample = read_32bit(chunk_offset+0x08,streamFile); loop_end_sample = read_32bit(chunk_offset+0x08,streamFile);
loop_flag = (uint8_t)read_8bit(chunk_offset+0x03,streamFile) > 0 || loop_end_sample; /* rarely not set, encoder default */ loop_flag = (uint8_t)read_8bit(chunk_offset+0x03,streamFile) > 0 || loop_end_sample; /* rarely not set, encoder default */
sample_rate = read_32bit(chunk_offset+0x0c,streamFile); sample_rate = read_32bit(chunk_offset+0x0c,streamFile);
/* may need loop end +1 */
offset = xma2_chunk_version == 3 ? 0x14 : 0x1C; offset = xma2_chunk_version == 3 ? 0x14 : 0x1C;
num_samples = read_32bit(chunk_offset+offset+0x00,streamFile); num_samples = read_32bit(chunk_offset+offset+0x00,streamFile);
@ -874,25 +969,7 @@ void xma2_parse_xma2_chunk(STREAMFILE *streamFile, off_t chunk_offset, int * out
channels += read_8bit(chunk_offset+offset+i*0x04,streamFile); channels += read_8bit(chunk_offset+offset+i*0x04,streamFile);
} }
#if 0 //todo apply once FFmpeg decode is ok /* samples are "raw" values, must be fixed externally (see xma_fix_raw_samples) */
/* apply extra output + skips (see ms_audio_get_samples, approximate as find out with first and last frames) */
{
int start_skip = 512;
int end_skip = 0;
num_samples += 128;
num_samples -= start_skip;
num_samples -= end_skip;
if (loop_flag) {
loop_start_sample += 128;
loop_start_sample -= start_skip;
loop_end_sample += 128;
loop_end_sample -= start_skip;
}
}
#endif
if(out_channels) *out_channels = channels; if(out_channels) *out_channels = channels;
if(out_sample_rate) *out_sample_rate = sample_rate; if(out_sample_rate) *out_sample_rate = sample_rate;
if(out_num_samples) *out_num_samples = num_samples; if(out_num_samples) *out_num_samples = num_samples;