diff --git a/src/coding/coding.h b/src/coding/coding.h index 02e1142f..400f2eb8 100644 --- a/src/coding/coding.h +++ b/src/coding/coding.h @@ -109,8 +109,11 @@ void decode_at3plus(VGMSTREAM *vgmstream, #endif #ifdef VGM_USE_FFMPEG -void decode_ffmpeg(VGMSTREAM *stream, - sample * outbuf, int32_t samples_to_do, int channels); +void decode_ffmpeg(VGMSTREAM *stream, sample * outbuf, int32_t samples_to_do, int channels); + +void reset_ffmpeg(VGMSTREAM *vgmstream); + +void seek_ffmpeg(VGMSTREAM *vgmstream, int32_t num_sample); #endif void decode_acm(ACMStream * acm, sample * outbuf, diff --git a/src/coding/ffmpeg_decoder.c b/src/coding/ffmpeg_decoder.c index a6f0a5d6..4abc52e0 100644 --- a/src/coding/ffmpeg_decoder.c +++ b/src/coding/ffmpeg_decoder.c @@ -59,18 +59,16 @@ static void convert_audio(sample *outbuf, const uint8_t *inbuf, int sampleCount, } } -void decode_ffmpeg(VGMSTREAM *vgmstream, - sample * outbuf, int32_t samples_to_do, int channels) { +void decode_ffmpeg(VGMSTREAM *vgmstream, sample * outbuf, int32_t samples_to_do, int channels) { ffmpeg_codec_data *data = (ffmpeg_codec_data *) vgmstream->codec_data; + int bytesPerSample; + int bytesPerFrame; int frameSize; - int dataSize; int bytesToRead; int bytesRead; - int errcode; - uint8_t *targetBuf; AVFormatContext *formatCtx; @@ -78,25 +76,24 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, AVPacket *lastReadPacket; AVFrame *lastDecodedFrame; - int streamIndex; - int bytesConsumedFromDecodedFrame; int readNextPacket; int endOfStream; int endOfAudio; - int toConsume; - int framesReadNow; - if ((data->totalFrames && data->framesRead >= data->totalFrames) || data->endOfStream || data->endOfAudio) { + + /* ignore decode attempts at EOF */ + if (data->endOfStream || data->endOfAudio) { memset(outbuf, 0, samples_to_do * channels * sizeof(sample)); return; } - frameSize = data->channels * (data->bitsPerSample / 8); - dataSize = 0; + bytesPerSample = data->bitsPerSample / 8; + bytesPerFrame = channels * bytesPerSample; + frameSize = data->channels * bytesPerSample; bytesToRead = samples_to_do * frameSize; bytesRead = 0; @@ -109,8 +106,6 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, lastReadPacket = data->lastReadPacket; lastDecodedFrame = data->lastDecodedFrame; - streamIndex = data->streamIndex; - bytesConsumedFromDecodedFrame = data->bytesConsumedFromDecodedFrame; readNextPacket = data->readNextPacket; @@ -120,15 +115,18 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, /* keep reading and decoding packets until the requested number of samples (in bytes) */ while (bytesRead < bytesToRead) { int planeSize; - int planar = av_sample_fmt_is_planar(codecCtx->sample_fmt); - dataSize = av_samples_get_buffer_size(&planeSize, codecCtx->channels, - lastDecodedFrame->nb_samples, - codecCtx->sample_fmt, 1); - + int planar; + int dataSize; + int toConsume; + int errcode; + + + /* size of previous frame */ + dataSize = av_samples_get_buffer_size(&planeSize, codecCtx->channels, lastDecodedFrame->nb_samples, codecCtx->sample_fmt, 1); if (dataSize < 0) dataSize = 0; - /* read packet */ + /* read new frame + packets when requested */ while (readNextPacket && !endOfAudio) { if (!endOfStream) { av_packet_unref(lastReadPacket); @@ -139,10 +137,11 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, if (formatCtx->pb && formatCtx->pb->error) break; } - if (lastReadPacket->stream_index != streamIndex) - continue; /* ignore non audio streams */ + if (lastReadPacket->stream_index != data->streamIndex) + continue; /* ignore non-selected streams */ } + /* send compressed packet to decoder (NULL at EOF to "drain") */ if ((errcode = avcodec_send_packet(codecCtx, endOfStream ? NULL : lastReadPacket)) < 0) { if (errcode != AVERROR(EAGAIN)) { goto end; @@ -152,13 +151,14 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, readNextPacket = 0; } - /* decode packet */ + /* decode packets into frame (checking if we have bytes to consume from previous frame) */ if (dataSize <= bytesConsumedFromDecodedFrame) { if (endOfStream && endOfAudio) break; bytesConsumedFromDecodedFrame = 0; + /* receive uncompressed data from decoder */ if ((errcode = avcodec_receive_frame(codecCtx, lastDecodedFrame)) < 0) { if (errcode == AVERROR_EOF) { endOfAudio = 1; @@ -173,46 +173,45 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, } } + /* size of current frame */ dataSize = av_samples_get_buffer_size(&planeSize, codecCtx->channels, lastDecodedFrame->nb_samples, codecCtx->sample_fmt, 1); - if (dataSize < 0) dataSize = 0; } toConsume = FFMIN((dataSize - bytesConsumedFromDecodedFrame), (bytesToRead - bytesRead)); - /* discard packet if needed (fully or partially) */ + /* discard decoded frame if needed (fully or partially) */ if (data->samplesToDiscard) { - int samplesToConsume; - int bytesPerFrame = ((data->bitsPerSample / 8) * channels); + int samplesDataSize = dataSize / bytesPerFrame; - /* discard all if there are more samples to do than the packet's samples */ - if (data->samplesToDiscard >= dataSize / bytesPerFrame) { - samplesToConsume = dataSize / bytesPerFrame; - } - else { - samplesToConsume = toConsume / bytesPerFrame; - } + if (data->samplesToDiscard >= samplesDataSize) { + /* discard all of the frame's samples and continue to the next */ - if (data->samplesToDiscard >= samplesToConsume) { /* full discard: skip to next */ - data->samplesToDiscard -= samplesToConsume; bytesConsumedFromDecodedFrame = dataSize; + data->samplesToDiscard -= samplesDataSize; + continue; } - else { /* partial discard: copy below */ - bytesConsumedFromDecodedFrame += data->samplesToDiscard * bytesPerFrame; - toConsume -= data->samplesToDiscard * bytesPerFrame; + else { + /* discard part of the frame and copy the rest below */ + int bytesToDiscard = data->samplesToDiscard * bytesPerFrame; + int dataSizeLeft = dataSize - bytesToDiscard; + + bytesConsumedFromDecodedFrame += bytesToDiscard; data->samplesToDiscard = 0; + if (toConsume > dataSizeLeft) + toConsume = dataSizeLeft; /* consume at most dataSize left */ } } - /* copy packet to buffer (mux channels if needed) */ + /* copy decoded frame to buffer (mux channels if needed) */ + planar = av_sample_fmt_is_planar(codecCtx->sample_fmt); if (!planar || channels == 1) { memmove(targetBuf + bytesRead, (lastDecodedFrame->data[0] + bytesConsumedFromDecodedFrame), toConsume); } else { uint8_t * out = (uint8_t *) targetBuf + bytesRead; - int bytesPerSample = data->bitsPerSample / 8; int bytesConsumedPerPlane = bytesConsumedFromDecodedFrame / channels; int toConsumePerPlane = toConsume / channels; int s, ch; @@ -231,11 +230,6 @@ void decode_ffmpeg(VGMSTREAM *vgmstream, end: framesReadNow = bytesRead / frameSize; - if (data->totalFrames && (data->framesRead + framesReadNow > data->totalFrames)) { - framesReadNow = (int)(data->totalFrames - data->framesRead); - } - - data->framesRead += framesReadNow; // Convert the audio convert_audio(outbuf, data->sampleBuffer, framesReadNow * channels, data->bitsPerSample, data->floatingPoint); @@ -247,4 +241,70 @@ end: data->endOfAudio = endOfAudio; } + +void reset_ffmpeg(VGMSTREAM *vgmstream) { + ffmpeg_codec_data *data = (ffmpeg_codec_data *) vgmstream->codec_data; + + if (data->formatCtx) { + avformat_seek_file(data->formatCtx, data->streamIndex, 0, 0, 0, AVSEEK_FLAG_ANY); + } + if (data->codecCtx) { + avcodec_flush_buffers(data->codecCtx); + } + data->readNextPacket = 1; + data->bytesConsumedFromDecodedFrame = INT_MAX; + data->endOfStream = 0; + data->endOfAudio = 0; + data->samplesToDiscard = 0; +} + + +void seek_ffmpeg(VGMSTREAM *vgmstream, int32_t num_sample) { + ffmpeg_codec_data *data = (ffmpeg_codec_data *) vgmstream->codec_data; + int64_t ts; + +#ifndef VGM_USE_FFMPEG_ACCURATE_LOOPING + /* Seek to loop start by timestamp (closest frame) + adjust skipping some samples */ + /* FFmpeg seeks by ts by design (since not all containers can accurately skip to a frame). */ + /* TODO: this seems to be off by +-1 frames in some cases */ + ts = num_sample; + if (ts >= data->sampleRate * 2) { + data->samplesToDiscard = data->sampleRate * 2; + ts -= data->samplesToDiscard; + } + else { + data->samplesToDiscard = (int)ts; + ts = 0; + } + + /* todo fix this properly */ + if (data->totalFrames) { + ts = (int)ts * (data->formatCtx->duration) / data->totalFrames; + } else { + data->samplesToDiscard = num_sample; + ts = 0; + } + + avformat_seek_file(data->formatCtx, data->streamIndex, ts - 1000, ts, ts, AVSEEK_FLAG_ANY); + avcodec_flush_buffers(data->codecCtx); +#endif /* ifndef VGM_USE_FFMPEG_ACCURATE_LOOPING */ + +#ifdef VGM_USE_FFMPEG_ACCURATE_LOOPING + /* Start from 0 and discard samples until loop_start for accurate looping (slower but not too noticeable) */ + /* We could also seek by offset (AVSEEK_FLAG_BYTE) to the frame closest to the loop then discard + * some samples, which is fast but would need calculations per format / when frame size is not constant */ + data->samplesToDiscard = num_sample; + ts = 0; + + avformat_seek_file(data->formatCtx, data->streamIndex, ts, ts, ts, AVSEEK_FLAG_ANY); + avcodec_flush_buffers(data->codecCtx); +#endif /* ifdef VGM_USE_FFMPEG_ACCURATE_LOOPING */ + + data->readNextPacket = 1; + data->bytesConsumedFromDecodedFrame = INT_MAX; + data->endOfStream = 0; + data->endOfAudio = 0; + +} + #endif diff --git a/src/meta/ffmpeg.c b/src/meta/ffmpeg.c index 53ece35b..e77c0c60 100644 --- a/src/meta/ffmpeg.c +++ b/src/meta/ffmpeg.c @@ -4,7 +4,12 @@ #ifdef VGM_USE_FFMPEG +/* internal sizes, can be any value */ #define FFMPEG_DEFAULT_BLOCK_SIZE 2048 +#define FFMPEG_DEFAULT_IO_BUFFER_SIZE 128 * 1024 + +static int init_seek(ffmpeg_codec_data * data); + static volatile int g_ffmpeg_initialized = 0; @@ -54,10 +59,19 @@ VGMSTREAM * init_vgmstream_ffmpeg_offset(STREAMFILE *streamFile, uint64_t start, vgmstream->layout_type = layout_none; vgmstream->meta_type = meta_FFmpeg; + /* this may happen for some streams */ + if (vgmstream->num_samples <= 0) + goto fail; + + return vgmstream; fail: free_ffmpeg(data); + if (vgmstream) { + vgmstream->codec_data = NULL; + close_vgmstream(vgmstream); + } return NULL; } @@ -70,10 +84,9 @@ static int ffmpeg_read(void *opaque, uint8_t *buf, int buf_size) { ffmpeg_codec_data *data = (ffmpeg_codec_data *) opaque; uint64_t offset = data->offset; - int max_to_copy; + int max_to_copy = 0; int ret; if (data->header_insert_block) { - max_to_copy = 0; if (offset < data->header_size) { max_to_copy = (int)(data->header_size - offset); if (max_to_copy > buf_size) { @@ -122,7 +135,11 @@ static int64_t ffmpeg_seek(void *opaque, int64_t offset, int whence) return data->size + data->header_size; } whence &= ~(AVSEEK_SIZE | AVSEEK_FORCE); + /* false offsets, on reads data->start will be added */ switch (whence) { + case SEEK_SET: + break; + case SEEK_CUR: offset += data->offset; break; @@ -159,6 +176,7 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of int errcode, i; int streamIndex; + AVStream *stream; AVCodecParameters *codecPar; AVRational tb; @@ -216,10 +234,10 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of /* setup IO, attempt to autodetect format and gather some info */ - data->buffer = av_malloc(128 * 1024); + data->buffer = av_malloc(FFMPEG_DEFAULT_IO_BUFFER_SIZE); if (!data->buffer) goto fail; - data->ioCtx = avio_alloc_context(data->buffer, 128 * 1024, 0, data, ffmpeg_read, ffmpeg_write, ffmpeg_seek); + data->ioCtx = avio_alloc_context(data->buffer, FFMPEG_DEFAULT_IO_BUFFER_SIZE, 0, data, ffmpeg_read, ffmpeg_write, ffmpeg_seek); if (!data->ioCtx) goto fail; data->formatCtx = avformat_alloc_context(); @@ -236,16 +254,19 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of streamIndex = -1; for (i = 0; i < data->formatCtx->nb_streams; ++i) { - codecPar = data->formatCtx->streams[i]->codecpar; - if (codecPar->codec_type == AVMEDIA_TYPE_AUDIO) { - streamIndex = i; - break; + stream = data->formatCtx->streams[i]; + codecPar = stream->codecpar; + if (streamIndex < 0 && codecPar->codec_type == AVMEDIA_TYPE_AUDIO) { + streamIndex = i; /* select first audio stream found */ + } else { + stream->discard = AVDISCARD_ALL; /* disable demuxing unneded streams */ } } if (streamIndex < 0) goto fail; data->streamIndex = streamIndex; + stream = data->formatCtx->streams[streamIndex]; /* prepare codec and frame/packet buffers */ @@ -254,7 +275,7 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of if ((errcode = avcodec_parameters_to_context(data->codecCtx, codecPar)) < 0) goto fail; - av_codec_set_pkt_timebase(data->codecCtx, data->formatCtx->streams[streamIndex]->time_base); + av_codec_set_pkt_timebase(data->codecCtx, stream->time_base); data->codec = avcodec_find_decoder(data->codecCtx->codec_id); if (!data->codec) goto fail; @@ -311,16 +332,14 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of } data->bitrate = (int)(data->codecCtx->bit_rate); - data->framesRead = 0; data->endOfStream = 0; data->endOfAudio = 0; /* try to guess frames/samples (duration isn't always set) */ tb.num = 1; tb.den = data->codecCtx->sample_rate; - data->totalFrames = av_rescale_q(data->formatCtx->streams[streamIndex]->duration, data->formatCtx->streams[streamIndex]->time_base, tb); + data->totalFrames = av_rescale_q(stream->duration, stream->time_base, tb); if (data->totalFrames < 0) - data->totalFrames = 0; - + data->totalFrames = 0; /* caller must consider this */ /* setup decode buffer */ data->samplesPerBlock = FFMPEG_DEFAULT_BLOCK_SIZE; @@ -328,6 +347,12 @@ ffmpeg_codec_data * init_ffmpeg_faux_riff(STREAMFILE *streamFile, int64_t fmt_of if (!data->sampleBuffer) goto fail; + + /* setup decent seeking for faulty formats */ + errcode = init_seek(data); + if (errcode < 0) goto fail; + + return data; fail: @@ -337,6 +362,82 @@ fail: } +/** + * Special patching for FFmpeg's buggy seek code. + * + * To seek with avformat_seek_file/av_seek_frame, FFmpeg's demuxers can implement read_seek2 (newest API) + * or read_seek (older API), with various search modes. If none are available it will use seek_frame_generic, + * which manually reads frame by frame until the selected timestamp. However, the prev frame will be consumed + * (so after seeking to 0 next av_read_frame will actually give the second frame and so on). + * + * Fortunately seek_frame_generic can use an index to find the correct position. This function reads the + * first frame/packet and sets up index to timestamp 0. This ensures faulty demuxers will seek to 0 correctly. + * Some formats may not seek to 0 even with this, though. + */ +static int init_seek(ffmpeg_codec_data * data) { + int ret, ts_index, found_first = 0; + int64_t ts = 0; + int64_t pos; /* offset */ + int size; /* coded size */ + int distance = 0; /* always? */ + + AVStream * stream; + AVPacket * pkt; + + stream = data->formatCtx->streams[data->streamIndex]; + pkt = data->lastReadPacket; + + /* read_seek shouldn't need this index, but direct access to FFmpeg's internals is no good */ + /* if (data->formatCtx->iformat->read_seek || data->formatCtx->iformat->read_seek2) + return 0; */ + + /* some formats already have a proper index (e.g. M4A) */ + ts_index = av_index_search_timestamp(stream, ts, AVSEEK_FLAG_ANY); + if (ts_index>=0) + goto test_seek; + + + /* find the first + second packets to get pos/size */ + while (1) { + av_packet_unref(pkt); + ret = av_read_frame(data->formatCtx, pkt); + if (ret < 0) + goto fail; + if (pkt->stream_index != data->streamIndex) + continue; /* ignore non-selected streams */ + + if (!found_first) { /* first found */ + found_first = 1; + pos = pkt->pos; + continue; + } else { /* second found */ + size = pkt->pos - pos; /* coded, pkt->size is decoded size */ + break; + } + } + + /* add index 0 */ + ret = av_add_index_entry(stream, pos, ts, size, distance, AVINDEX_KEYFRAME); + if ( ret < 0 ) + return ret; + + +test_seek: + /* seek to 0 test / move back to beginning, since we just consumed packets */ + ret = avformat_seek_file(data->formatCtx, data->streamIndex, ts, ts, ts, AVSEEK_FLAG_ANY); + if ( ret < 0 ) + return ret; /* we can't even reset_vgmstream the file */ + + avcodec_flush_buffers(data->codecCtx); + + return 0; + + +fail: + return -1; +} + + void free_ffmpeg(ffmpeg_codec_data *data) { if (data->lastReadPacket) { av_packet_unref(data->lastReadPacket); diff --git a/src/vgmstream.c b/src/vgmstream.c index ef063f2f..56e1d42e 100644 --- a/src/vgmstream.c +++ b/src/vgmstream.c @@ -515,20 +515,7 @@ void reset_vgmstream(VGMSTREAM * vgmstream) { #ifdef VGM_USE_FFMPEG if (vgmstream->coding_type==coding_FFmpeg) { - ffmpeg_codec_data *data = (ffmpeg_codec_data *) vgmstream->codec_data; - - if (data->formatCtx) { - avformat_seek_file(data->formatCtx, -1, 0, 0, 0, AVSEEK_FLAG_ANY); - } - if (data->codecCtx) { - avcodec_flush_buffers(data->codecCtx); - } - data->readNextPacket = 1; - data->bytesConsumedFromDecodedFrame = INT_MAX; - data->framesRead = 0; - data->endOfStream = 0; - data->endOfAudio = 0; - data->samplesToDiscard = 0; + reset_ffmpeg(vgmstream); } #endif @@ -1789,53 +1776,7 @@ int vgmstream_do_loop(VGMSTREAM * vgmstream) { } #ifdef VGM_USE_FFMPEG if (vgmstream->coding_type==coding_FFmpeg) { - ffmpeg_codec_data *data = (ffmpeg_codec_data *)(vgmstream->codec_data); - int64_t ts; - -#ifndef VGM_USE_FFMPEG_ACCURATE_LOOPING - /* Seek to loop start by timestamp (closest frame) + adjust skipping some samples */ - /* FFmpeg seeks by ts by design (since not all containers can accurately skip to a frame). */ - /* TODO: this seems to be off by +-1 frames in some cases */ - ts = vgmstream->loop_start_sample; - if (ts >= data->sampleRate * 2) { - data->samplesToDiscard = data->sampleRate * 2; - ts -= data->samplesToDiscard; - } - else { - data->samplesToDiscard = (int)ts; - ts = 0; - } - - /* todo fix this properly */ - if (data->totalFrames) { - data->framesRead = (int)ts; - ts = data->framesRead * (data->formatCtx->duration) / data->totalFrames; - } else { - data->samplesToDiscard = vgmstream->loop_start_sample; - data->framesRead = 0; - ts = 0; - } - - avformat_seek_file(data->formatCtx, -1, ts - 1000, ts, ts, AVSEEK_FLAG_ANY); - avcodec_flush_buffers(data->codecCtx); -#endif /* ifndef VGM_USE_FFMPEG_ACCURATE_LOOPING */ - -#ifdef VGM_USE_FFMPEG_ACCURATE_LOOPING - /* Start from 0 and discard samples until loop_start for accurate looping (slower but not too noticeable) */ - /* We could also seek by offset (AVSEEK_FLAG_BYTE) to the frame closest to the loop then discard - * some samples, which is fast but would need calculations per format / when frame size is not constant */ - data->samplesToDiscard = vgmstream->loop_start_sample; - data->framesRead = 0; - ts = 0; - - avformat_seek_file(data->formatCtx, -1, ts, ts, ts, AVSEEK_FLAG_ANY); - avcodec_flush_buffers(data->codecCtx); -#endif /* ifdef VGM_USE_FFMPEG_ACCURATE_LOOPING */ - - data->readNextPacket = 1; - data->bytesConsumedFromDecodedFrame = INT_MAX; - data->endOfStream = 0; - data->endOfAudio = 0; + seek_ffmpeg(vgmstream, vgmstream->loop_start_sample); } #endif /* VGM_USE_FFMPEG */ #if defined(VGM_USE_MP4V2) && defined(VGM_USE_FDKAAC) diff --git a/src/vgmstream.h b/src/vgmstream.h index 1a71487a..483105fd 100644 --- a/src/vgmstream.h +++ b/src/vgmstream.h @@ -870,7 +870,6 @@ typedef struct { int floatingPoint; int sampleRate; int64_t totalFrames; // sample count, or 0 if unknown - int64_t framesRead; int bitrate; // Intermediate buffer