From 5d349b60ecd1ff269881391db1eb6e543bc5f9c8 Mon Sep 17 00:00:00 2001 From: arch1t3cht Date: Tue, 4 Nov 2025 14:11:34 +0100 Subject: [PATCH 1/3] Use RAII wrapper for AVPackets This is in preparation for DecodeNextFrame returning an AVPacket, at which point a RAII wrapper may be helpful to better track ownership semantics. This uses a simple std::unique_ptr alias that handles av_packet_alloc and av_packet_free, while av_packet_ref and av_packet_unref are still handled manually. --- src/core/audiosource.cpp | 34 ++++++++++++---------------- src/core/audiosource.h | 2 +- src/core/indexing.cpp | 48 ++++++++++++++++++---------------------- src/core/indexing.h | 6 ++--- src/core/utils.h | 12 ++++++++++ src/core/videosource.cpp | 47 +++++++++++++++++---------------------- src/core/videosource.h | 4 ++-- 7 files changed, 73 insertions(+), 80 deletions(-) diff --git a/src/core/audiosource.cpp b/src/core/audiosource.cpp index dbc55cda02..31d589dc39 100644 --- a/src/core/audiosource.cpp +++ b/src/core/audiosource.cpp @@ -301,12 +301,9 @@ FFMS_AudioSource::AudioBlock *FFMS_AudioSource::CacheBlock(CacheIterator &pos) { } int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) { - AVPacket *Packet = av_packet_alloc(); - if (!Packet) - throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_ALLOCATION_FAILED, - "Could not allocate packet."); - if (!ReadPacket(Packet)) { - av_packet_free(&Packet); + SmartAVPacket Packet; + + if (!ReadPacket(*Packet)) { throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_UNKNOWN, "ReadPacket unexpectedly failed to read a packet"); } @@ -315,7 +312,7 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) { CurrentSample = CurrentFrame->SampleStart; // Value code intentionally ignored, combined with the checks when indexing this mostly gives the expected behavior - avcodec_send_packet(CodecContext, Packet); + avcodec_send_packet(CodecContext, Packet.get()); int NumberOfSamples = 0; AudioBlock *CachedBlock = nullptr; @@ -331,12 +328,11 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) { } break; } else if (Ret == AVERROR(EAGAIN)) { - if (!ReadPacket(Packet)) { - av_packet_free(&Packet); + if (!ReadPacket(*Packet)) { throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_UNKNOWN, "ReadPacket unexpectedly failed to read a packet"); } - avcodec_send_packet(CodecContext, Packet); + avcodec_send_packet(CodecContext, Packet.get()); } else if (Ret == AVERROR_EOF) { break; } else { @@ -344,8 +340,6 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) { } } - av_packet_free(&Packet); - // Zero sample packets aren't included in the index if (!NumberOfSamples) return NumberOfSamples; @@ -553,24 +547,24 @@ void FFMS_AudioSource::Seek() { } } -bool FFMS_AudioSource::ReadPacket(AVPacket *Packet) { - while (av_read_frame(FormatContext, Packet) >= 0) { - if (Packet->stream_index == TrackNumber) { +bool FFMS_AudioSource::ReadPacket(AVPacket &Packet) { + while (av_read_frame(FormatContext, &Packet) >= 0) { + if (Packet.stream_index == TrackNumber) { // Required because not all audio packets, especially in ogg, have a pts. Use the previous valid packet's pts instead. - if (Packet->pts == AV_NOPTS_VALUE) - Packet->pts = LastValidTS; + if (Packet.pts == AV_NOPTS_VALUE) + Packet.pts = LastValidTS; else - LastValidTS = Packet->pts; + LastValidTS = Packet.pts; // This only happens if a really shitty demuxer seeks to a packet without pts *hrm* ogg *hrm* so read until a valid pts is reached - int64_t PacketTS = Frames.HasTS ? Packet->pts : Packet->pos; + int64_t PacketTS = Frames.HasTS ? Packet.pts : Packet.pos; if (PacketTS != AV_NOPTS_VALUE) { while (PacketNumber > 0 && FrameTS(PacketNumber) > PacketTS) --PacketNumber; while (FrameTS(PacketNumber) < PacketTS) ++PacketNumber; return true; } } - av_packet_unref(Packet); + av_packet_unref(&Packet); } return false; } diff --git a/src/core/audiosource.h b/src/core/audiosource.h index a8efc474bd..6bd6e871b5 100644 --- a/src/core/audiosource.h +++ b/src/core/audiosource.h @@ -101,7 +101,7 @@ struct FFMS_AudioSource { // Called after seeking void Seek(); // Read the next packet from the file - bool ReadPacket(AVPacket *); + bool ReadPacket(AVPacket &); // Close and reopen the source file to seek back to the beginning. Only // needs to do anything for formats that can't seek to the beginning otherwise. diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp index c13b8ee599..83d3f30ed8 100644 --- a/src/core/indexing.cpp +++ b/src/core/indexing.cpp @@ -305,10 +305,10 @@ FFMS_Indexer::FFMS_Indexer(const char *Filename, const FFMS_KeyValuePair *Demuxe } } -uint32_t FFMS_Indexer::IndexAudioPacket(int Track, AVPacket *Packet, SharedAVContext &Context, FFMS_Index &TrackIndices) { +uint32_t FFMS_Indexer::IndexAudioPacket(int Track, const AVPacket &Packet, SharedAVContext &Context, FFMS_Index &TrackIndices) { AVCodecContext *CodecContext = Context.CodecContext; int64_t StartSample = Context.CurrentSample; - int Ret = avcodec_send_packet(CodecContext, Packet); + int Ret = avcodec_send_packet(CodecContext, &Packet); if (Ret != 0) { if (ErrorHandling == FFMS_IEH_ABORT) { throw FFMS_Exception(FFMS_ERROR_CODEC, FFMS_ERROR_DECODING, "Audio decoding error"); @@ -364,7 +364,7 @@ void FFMS_Indexer::CheckAudioProperties(int Track, AVCodecContext *Context) { } } -void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt, int *RepeatPict, +void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, const AVPacket &pkt, int *RepeatPict, int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct) { if (VideoContext.Parser) { uint8_t *OB; @@ -373,8 +373,8 @@ void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt av_parser_parse2(VideoContext.Parser, VideoContext.CodecContext, &OB, &OBSize, - pkt->data, pkt->size, - pkt->pts, pkt->dts, pkt->pos); + pkt.data, pkt.size, + pkt.pts, pkt.dts, pkt.pos); // H.264 (PAFF) and HEVC may have one field per packet, so we need to track // when we have a full or half frame available, and mark one of them as @@ -394,15 +394,15 @@ void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt *RepeatPict = VideoContext.Parser->repeat_pict; *FrameType = VideoContext.Parser->pict_type; - *Invisible = (VideoContext.Parser->repeat_pict < 0 || (pkt->flags & AV_PKT_FLAG_DISCARD)); + *Invisible = (VideoContext.Parser->repeat_pict < 0 || (pkt.flags & AV_PKT_FLAG_DISCARD)); } else { - *Invisible = !!(pkt->flags & AV_PKT_FLAG_DISCARD); + *Invisible = !!(pkt.flags & AV_PKT_FLAG_DISCARD); } if (VideoContext.CodecContext->codec_id == AV_CODEC_ID_VP8) - ParseVP8(pkt->data[0], Invisible, FrameType); + ParseVP8(pkt.data[0], Invisible, FrameType); else if (VideoContext.CodecContext->codec_id == AV_CODEC_ID_VP9) - ParseVP9(pkt->data[0], FrameType); + ParseVP9(pkt.data[0], FrameType); } void FFMS_Indexer::Free() { @@ -502,34 +502,30 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { } } - AVPacket *Packet = av_packet_alloc(); - if (!Packet) - throw FFMS_Exception(FFMS_ERROR_CODEC, FFMS_ERROR_ALLOCATION_FAILED, - "Could not allocate packet."); + SmartAVPacket Packet; std::vector LastValidTS(FormatContext->nb_streams, AV_NOPTS_VALUE); int64_t filesize = avio_size(FormatContext->pb); enum AVPictureStructure LastPicStruct = AV_PICTURE_STRUCTURE_UNKNOWN; int ret; - while ((ret = av_read_frame(FormatContext, Packet)) >= 0) { + while ((ret = av_read_frame(FormatContext, Packet.get())) >= 0) { // Update progress // FormatContext->pb can apparently be NULL when opening images. if (IC && FormatContext->pb) { if ((*IC)(FormatContext->pb->pos, filesize, ICPrivate)) { - av_packet_free(&Packet); throw FFMS_Exception(FFMS_ERROR_CANCELLED, FFMS_ERROR_USER, "Cancelled by user"); } } if (!IndexMask.count(Packet->stream_index)) { - av_packet_unref(Packet); + av_packet_unref(Packet.get()); continue; } int Track = Packet->stream_index; FFMS_Track &TrackInfo = (*TrackIndices)[Track]; bool KeyFrame = !!(Packet->flags & AV_PKT_FLAG_KEY); - ReadTS(Packet, LastValidTS[Track], (*TrackIndices)[Track].UseDTS); + ReadTS(*Packet, LastValidTS[Track], (*TrackIndices)[Track].UseDTS); if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { int64_t PTS = TrackInfo.UseDTS ? Packet->dts : Packet->pts; @@ -545,7 +541,6 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { bool HasAltRefs = (FormatContext->streams[Track]->codecpar->codec_id == AV_CODEC_ID_VP8 || FormatContext->streams[Track]->codecpar->codec_id == AV_CODEC_ID_VP9); if (Packet->duration == 0 && !HasAltRefs) { - av_packet_free(&Packet); throw FFMS_Exception(FFMS_ERROR_INDEXING, FFMS_ERROR_PARSER, "Invalid packet pts, dts, and duration"); } @@ -562,7 +557,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { int FrameType = 0; bool Invisible = false; bool SecondField = false; - ParseVideoPacket(AVContexts[Track], Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct); + ParseVideoPacket(AVContexts[Track], *Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct); TrackInfo.AddVideoFrame(PTS, Packet->dts, RepeatPict, KeyFrame, FrameType, Packet->pos, Invisible, SecondField); @@ -574,7 +569,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { TrackInfo.HasTS = true; int64_t StartSample = AVContexts[Track].CurrentSample; - uint32_t SampleCount = IndexAudioPacket(Track, Packet, AVContexts[Track], *TrackIndices); + uint32_t SampleCount = IndexAudioPacket(Track, *Packet, AVContexts[Track], *TrackIndices); TrackInfo.SampleRate = AVContexts[Track].CodecContext->sample_rate; TrackInfo.AddAudioFrame(LastValidTS[Track], Packet->dts, @@ -584,9 +579,8 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { if (!(Packet->flags & AV_PKT_FLAG_DISCARD)) TrackInfo.LastDuration = Packet->duration; - av_packet_unref(Packet); + av_packet_unref(Packet.get()); } - av_packet_free(&Packet); if (IsIOError(ret)) throw FFMS_Exception(FFMS_ERROR_INDEXING, FFMS_ERROR_FILE_READ, "Indexing failed: " + AVErrorToString(ret)); @@ -595,11 +589,11 @@ FFMS_Index *FFMS_Indexer::DoIndexing() { return TrackIndices.release(); } -void FFMS_Indexer::ReadTS(const AVPacket *Packet, int64_t &TS, bool &UseDTS) { - if (!UseDTS && Packet->pts != AV_NOPTS_VALUE) - TS = Packet->pts; +void FFMS_Indexer::ReadTS(const AVPacket &Packet, int64_t &TS, bool &UseDTS) { + if (!UseDTS && Packet.pts != AV_NOPTS_VALUE) + TS = Packet.pts; if (TS == AV_NOPTS_VALUE) UseDTS = true; - if (UseDTS && Packet->dts != AV_NOPTS_VALUE) - TS = Packet->dts; + if (UseDTS && Packet.dts != AV_NOPTS_VALUE) + TS = Packet.dts; } diff --git a/src/core/indexing.h b/src/core/indexing.h index a108648028..4b0360c8c8 100644 --- a/src/core/indexing.h +++ b/src/core/indexing.h @@ -82,10 +82,10 @@ struct FFMS_Indexer { int64_t Filesize; uint8_t Digest[20]; - void ReadTS(const AVPacket *Packet, int64_t &TS, bool &UseDTS); + void ReadTS(const AVPacket &Packet, int64_t &TS, bool &UseDTS); void CheckAudioProperties(int Track, AVCodecContext *Context); - uint32_t IndexAudioPacket(int Track, AVPacket *Packet, SharedAVContext &Context, FFMS_Index &TrackIndices); - void ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt, int *RepeatPict, int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct); + uint32_t IndexAudioPacket(int Track, const AVPacket &Packet, SharedAVContext &Context, FFMS_Index &TrackIndices); + void ParseVideoPacket(SharedAVContext &VideoContext, const AVPacket &pkt, int *RepeatPict, int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct); void Free(); public: FFMS_Indexer(const char *Filename, const FFMS_KeyValuePair *DemuxerOptions, int NumOptions); diff --git a/src/core/utils.h b/src/core/utils.h index 14492d9798..39bd41dfdf 100644 --- a/src/core/utils.h +++ b/src/core/utils.h @@ -63,6 +63,18 @@ void FillAP(FFMS_AudioProperties &AP, AVCodecContext *CTX, FFMS_Track &Frames); void LAVFOpenFile(const char *SourceFile, AVFormatContext *&FormatContext, int Track, const std::map &LAVFOpts); +// RAII wrapper for AVPacket * that handles av_packet_alloc and av_packet_free. +// av_packet_ref and av_packet_unref still need to be handled by user code. +class SmartAVPacket : public std::unique_ptr { +public: + SmartAVPacket() : std::unique_ptr(av_packet_alloc(), [](AVPacket *packet) { av_packet_free(&packet); }) { + if (!*this) { + throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED, + "Could not allocate packet."); + } + } +}; + namespace optdetail { template T get_av_opt(void *v, const char *name) { diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp index 75a91f820a..394a3beb96 100644 --- a/src/core/videosource.cpp +++ b/src/core/videosource.cpp @@ -252,11 +252,10 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in DecodeFrame = av_frame_alloc(); LastDecodedFrame = av_frame_alloc(); - StashedPacket = av_packet_alloc(); - if (!DecodeFrame || !LastDecodedFrame || !StashedPacket) + if (!DecodeFrame || !LastDecodedFrame) throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED, - "Could not allocate dummy frame / stashed packet."); + "Could not allocate dummy frame."); // Dummy allocations so the unallocated case doesn't have to be handled later if (av_image_alloc(SWSFrameData, SWSFrameLinesize, 16, 16, AV_PIX_FMT_GRAY8, 4) < 0) @@ -742,15 +741,15 @@ void FFMS_VideoSource::CopyEye(AVStereo3DView view) { } } -bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) { +bool FFMS_VideoSource::DecodePacket(const AVPacket &Packet) { std::swap(DecodeFrame, LastDecodedFrame); ResendPacket = false; - int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet->dts : Packet->pts, true); - bool PacketHidden = !!(Packet->flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden); + int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet.dts : Packet.pts, true); + bool PacketHidden = !!(Packet.flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden); bool SecondField = PacketNum != -1 && Frames[PacketNum].SecondField; - int Ret = avcodec_send_packet(CodecContext, Packet); + int Ret = avcodec_send_packet(CodecContext, &Packet); if (Ret == AVERROR(EAGAIN)) { // Send queue is full, so stash packet to resend on the next call. ResendPacket = true; @@ -818,7 +817,7 @@ int FFMS_VideoSource::Seek(int n) { // We always assume seeking is possible if the first seek succeeds avcodec_flush_buffers(CodecContext); ResendPacket = false; - av_packet_unref(StashedPacket); + av_packet_unref(StashedPacket.get()); // When it's 0 we always know what the next frame is (or more exactly should be) if (n == 0) @@ -839,7 +838,6 @@ void FFMS_VideoSource::Free() { av_freep(&RightEyeFrameData[0]); av_frame_free(&DecodeFrame); av_frame_free(&LastDecodedFrame); - av_packet_free(&StashedPacket); } void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { @@ -848,25 +846,22 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { if (HasPendingDelayedFrames()) return; - AVPacket *Packet = av_packet_alloc(); - if (!Packet) - throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED, - "Could not allocate packet."); + SmartAVPacket Packet; int ret; if (ResendPacket) { // If we have a packet previously stashed due to a full input queue, // send it again. ret = 0; - av_packet_ref(Packet, StashedPacket); - av_packet_unref(StashedPacket); + av_packet_ref(Packet.get(), StashedPacket.get()); + av_packet_unref(StashedPacket.get()); } else { - ret = av_read_frame(FormatContext, Packet); + ret = av_read_frame(FormatContext, Packet.get()); } while (ret >= 0) { if (Packet->stream_index != VideoTrack) { - av_packet_unref(Packet); - ret = av_read_frame(FormatContext, Packet); + av_packet_unref(Packet.get()); + ret = av_read_frame(FormatContext, Packet.get()); continue; } @@ -876,21 +871,20 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { if (Pos < 0) Pos = Packet->pos; - bool FrameFinished = DecodePacket(Packet); + bool FrameFinished = DecodePacket(*Packet); if (ResendPacket) - av_packet_ref(StashedPacket, Packet); - av_packet_unref(Packet); + av_packet_ref(StashedPacket.get(), Packet.get()); + av_packet_unref(Packet.get()); if (FrameFinished) { - av_packet_free(&Packet); return; } if (ResendPacket) { ret = 0; - av_packet_ref(Packet, StashedPacket); - av_packet_unref(StashedPacket); + av_packet_ref(Packet.get(), StashedPacket.get()); + av_packet_unref(StashedPacket.get()); } else { - ret = av_read_frame(FormatContext, Packet); + ret = av_read_frame(FormatContext, Packet.get()); } } if (IsIOError(ret)) @@ -898,8 +892,7 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { "Failed to read packet: " + AVErrorToString(ret)); // Flush final frames - DecodePacket(Packet); - av_packet_free(&Packet); + DecodePacket(*Packet); } bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) { diff --git a/src/core/videosource.h b/src/core/videosource.h index 6b58a06db1..8d40b3bf83 100644 --- a/src/core/videosource.h +++ b/src/core/videosource.h @@ -99,7 +99,7 @@ struct FFMS_VideoSource { uint8_t *RightEyeFrameData[4] = {}; int RightEyeLinesize[4] = {}; - AVPacket *StashedPacket = nullptr; + SmartAVPacket StashedPacket; bool ResendPacket = false; void DetectInputFormat(); @@ -129,7 +129,7 @@ struct FFMS_VideoSource { void ReAdjustOutputFormat(AVFrame *Frame); FFMS_Frame *OutputFrame(AVFrame *Frame); void SetVideoProperties(); - bool DecodePacket(AVPacket *Packet); + bool DecodePacket(const AVPacket &Packet); void DecodeNextFrame(int64_t &PTS, int64_t &Pos); bool SeekTo(int n, int SeekOffset); int Seek(int n); From 8c8274ea8e7520085934058d0c89a408db332ddb Mon Sep 17 00:00:00 2001 From: arch1t3cht Date: Tue, 4 Nov 2025 17:32:17 +0100 Subject: [PATCH 2/3] Return a full AVPacket in DecodeNextFrame Instead of writing the pts and pos to output args. This does slightly change behavior: Previously, DecodeNextFrame would return - the pos of the first packet read, and - the first *nonnegative* pts (or dts if Frames.UseDTS is true) of a packet read. Hence, AStartTime and Pos could potentially belong to different packets if the first packet read had a negative PTS, like in an mp4 whose start is cut off by an edit list. In such cases, returning the first nonnegative PTS is the necessary with the current logic in GetFrame (which excludes hidden frames in the FrameFromPTS call), so keep this logic for now. This will change in the next commit, but this way this intermediate commit won't break any test files. --- src/core/videosource.cpp | 33 ++++++++++++++++----------------- src/core/videosource.h | 4 +++- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp index 394a3beb96..d26b223571 100644 --- a/src/core/videosource.cpp +++ b/src/core/videosource.cpp @@ -328,8 +328,7 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in SeekByPos = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw"); // Always try to decode a frame to make sure all required parameters are known - int64_t DummyPTS = 0, DummyPos = 0; - DecodeNextFrame(DummyPTS, DummyPos); + DecodeNextFrame(); //VP.image_type = VideoInfo::IT_TFF; VP.FPSDenominator = FormatContext->streams[VideoTrack]->time_base.num; @@ -840,13 +839,11 @@ void FFMS_VideoSource::Free() { av_frame_free(&LastDecodedFrame); } -void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { - AStartTime = -1; +SmartAVPacket FFMS_VideoSource::DecodeNextFrame() { + SmartAVPacket Packet, FirstPacket; if (HasPendingDelayedFrames()) - return; - - SmartAVPacket Packet; + return FirstPacket; int ret; if (ResendPacket) { @@ -865,18 +862,17 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { continue; } - if (AStartTime < 0) - AStartTime = Frames.UseDTS ? Packet->dts : Packet->pts; - - if (Pos < 0) - Pos = Packet->pos; + if (!FirstPacket->data || (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts) < 0) { + av_packet_unref(FirstPacket.get()); + av_packet_ref(FirstPacket.get(), Packet.get()); + } bool FrameFinished = DecodePacket(*Packet); if (ResendPacket) av_packet_ref(StashedPacket.get(), Packet.get()); av_packet_unref(Packet.get()); if (FrameFinished) { - return; + return FirstPacket; } if (ResendPacket) { @@ -893,6 +889,7 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) { // Flush final frames DecodePacket(*Packet); + return FirstPacket; } bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) { @@ -961,21 +958,23 @@ FFMS_Frame *FFMS_VideoSource::GetFrame(int n) { WasSkipped = false; } - int64_t StartTime = AV_NOPTS_VALUE, FilePos = -1; + SmartAVPacket FirstPacket; bool Skipped = (((unsigned) CurrentFrame < Frames.size()) && Frames[CurrentFrame].Skipped()); if (HasSeeked || !Skipped) { if (WasSkipped) WasSkipped = false; else - DecodeNextFrame(StartTime, FilePos); + FirstPacket = DecodeNextFrame(); } if (!HasSeeked) continue; + int64_t StartTime = FirstPacket->data == nullptr ? AV_NOPTS_VALUE : (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts); + if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) { - if (FilePos >= 0) { - CurrentFrame = Frames.FrameFromPos(FilePos); + if (FirstPacket->data) { + CurrentFrame = Frames.FrameFromPos(FirstPacket->pos); if (CurrentFrame >= 0) continue; } diff --git a/src/core/videosource.h b/src/core/videosource.h index 8d40b3bf83..99fcf11cd2 100644 --- a/src/core/videosource.h +++ b/src/core/videosource.h @@ -130,7 +130,9 @@ struct FFMS_VideoSource { FFMS_Frame *OutputFrame(AVFrame *Frame); void SetVideoProperties(); bool DecodePacket(const AVPacket &Packet); - void DecodeNextFrame(int64_t &PTS, int64_t &Pos); + + // Returns the first packet read that has nonzero PTS/DTS (depending on Frames.UseDTS) + SmartAVPacket DecodeNextFrame(); bool SeekTo(int n, int SeekOffset); int Seek(int n); void Free(); From 9c943eb6d5b7e8442f307f2cb12b550f8c345ed9 Mon Sep 17 00:00:00 2001 From: arch1t3cht Date: Tue, 23 Dec 2025 00:56:18 +0100 Subject: [PATCH 3/3] Rework packet finding functions After seeking, GetFrame() tries to find out where the demuxer ended up after the seek by trying to recognize the first packet that comes out of the demuxer afterwards by its PTS (or DTS if Frames.UseDTS is true), or by its Pos if that fails. Similarly, DecodePacket() needs to recognize the packet to see if it's a second field of some interlaced frame. When the same PTS can appear multiple times in the same file (e.g. in files with edit lists), this can cause issues. This commit replaces the FrameFromPTS / FrameFromPos functions and the fallback logic in GetFrame() by a single FindPacket function that takes an entire AVPacket. FindPacket() tries to find the unique packet in the index whose TS/Pos/Flags match the given packet. If no such packet exists, it tries a fallback sequence of comparing fewer fields. The current fallback sequence is mostly chosen based on intuition - for well-behaved files simply checking all fields should work fine. If examples come up where this fallback sequence becomes relevant, it can be adjusted. --- src/core/track.cpp | 86 ++++++++++++++++++++++++++++++++++------ src/core/track.h | 4 +- src/core/videosource.cpp | 24 ++--------- src/core/videosource.h | 2 +- 4 files changed, 79 insertions(+), 37 deletions(-) diff --git a/src/core/track.cpp b/src/core/track.cpp index 604e1493df..b75704f1f6 100644 --- a/src/core/track.cpp +++ b/src/core/track.cpp @@ -25,6 +25,7 @@ #include "indexing.h" #include +#include #include #include @@ -157,23 +158,82 @@ static bool PTSComparison(FrameInfo FI1, FrameInfo FI2) { return FI1.PTS < FI2.PTS; } -int FFMS_Track::FrameFromPTS(int64_t PTS, bool AllowHidden) const { +enum class AVPacketProp { + TS, // can be PTS or DTS depending on UseDTS + Pos, + Hidden, + Key, +}; + +const std::array, 5> FindPacketCheckSequence = {{ + {AVPacketProp::TS, AVPacketProp::Pos, AVPacketProp::Hidden, AVPacketProp::Key}, + {AVPacketProp::TS, AVPacketProp::Pos, AVPacketProp::Hidden}, + {AVPacketProp::TS, AVPacketProp::Pos}, + {AVPacketProp::TS}, + {AVPacketProp::Pos}, +}}; + +// Attempts to find the given AVPacket in the track's list of FrameInfos, +// returning the FrameInfo's index if one was found and -1 if not. +// +// The finding logic begins by trying to find a *unique* FrameInfo whose +// PTS, Pos, and flags match the given packet. +// If no such packet exists, it tries a fallback sequence of comparing fewer fields. +// +// The current fallback sequence is mostly chosen based on intuition - for +// well-behaved files simply checking all fields should work fine. +// If examples come up where this fallback sequence becomes relevant, it can +// be adjusted. +int FFMS_Track::FindPacket(const AVPacket &packet) const { FrameInfo F; - F.PTS = PTS; + F.PTS = UseDTS ? packet.dts : packet.pts; + + auto SameTSBegin = std::lower_bound(begin(), end(), F, PTSComparison);; + auto SameTSEnd = SameTSBegin; + while (SameTSEnd != end() && SameTSEnd->PTS == F.PTS) + SameTSEnd++; + + for (auto const& checks : FindPacketCheckSequence) { + // If the check sequence includes the timestamp, we can begin with a binary search + // since the FrameInfos are sorted by their timestamp, and then do a linear search in the found interval. + // If the check sequence does not include the timestamp, we have to do a full linear search. + auto Begin = begin(); + auto End = end(); + + if (checks[0] == AVPacketProp::TS) { + Begin = SameTSBegin; + End = SameTSEnd; + } - auto Pos = std::lower_bound(begin(), end(), F, PTSComparison); - while (Pos != end() && (!AllowHidden && Pos->Skipped()) && Pos->PTS == PTS) - Pos++; + int found = 0; + int result = -1; + + for (auto it = Begin; it < End; it++) { + bool match = std::all_of(checks.cbegin(), checks.cend(), [&](AVPacketProp check) { + switch (check) { + case AVPacketProp::TS: + return it->PTS == F.PTS; + case AVPacketProp::Pos: + return it->FilePos == packet.pos; + case AVPacketProp::Hidden: + return it->MarkedHidden == (packet.flags & AV_PKT_FLAG_DISCARD); + case AVPacketProp::Key: + return it->KeyFrame == (packet.flags & AV_PKT_FLAG_KEY); + } + return false; + }); + + if (match) { + found++; + result = std::distance(begin(), it); + } + } - if (Pos == end() || Pos->PTS != PTS) - return -1; - return std::distance(begin(), Pos); -} + if (found == 1) { + return result; + } + } -int FFMS_Track::FrameFromPos(int64_t Pos, bool AllowHidden) const { - for (size_t i = 0; i < size(); i++) - if (Data->Frames[i].FilePos == Pos && (AllowHidden || !Data->Frames[i].Skipped())) - return static_cast(i); return -1; } diff --git a/src/core/track.h b/src/core/track.h index dde1321881..1a26399384 100644 --- a/src/core/track.h +++ b/src/core/track.h @@ -27,6 +27,7 @@ #include #include +struct AVPacket; class ZipFile; struct FrameInfo { @@ -82,8 +83,7 @@ struct FFMS_Track { void FillAudioGaps(); int FindClosestVideoKeyFrame(int Frame) const; - int FrameFromPTS(int64_t PTS, bool AllowHidden = false) const; - int FrameFromPos(int64_t Pos, bool AllowHidden = false) const; + int FindPacket(const AVPacket &packet) const; int ClosestFrameFromPTS(int64_t PTS) const; int RealFrameNumber(int Frame) const; int VisibleFrameCount() const; diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp index d26b223571..6116d04435 100644 --- a/src/core/videosource.cpp +++ b/src/core/videosource.cpp @@ -744,7 +744,7 @@ bool FFMS_VideoSource::DecodePacket(const AVPacket &Packet) { std::swap(DecodeFrame, LastDecodedFrame); ResendPacket = false; - int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet.dts : Packet.pts, true); + int PacketNum = Frames.FindPacket(Packet); bool PacketHidden = !!(Packet.flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden); bool SecondField = PacketNum != -1 && Frames[PacketNum].SecondField; @@ -862,10 +862,8 @@ SmartAVPacket FFMS_VideoSource::DecodeNextFrame() { continue; } - if (!FirstPacket->data || (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts) < 0) { - av_packet_unref(FirstPacket.get()); + if (!FirstPacket->data) av_packet_ref(FirstPacket.get(), Packet.get()); - } bool FrameFinished = DecodePacket(*Packet); if (ResendPacket) @@ -971,23 +969,7 @@ FFMS_Frame *FFMS_VideoSource::GetFrame(int n) { continue; int64_t StartTime = FirstPacket->data == nullptr ? AV_NOPTS_VALUE : (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts); - - if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) { - if (FirstPacket->data) { - CurrentFrame = Frames.FrameFromPos(FirstPacket->pos); - if (CurrentFrame >= 0) - continue; - } - // If the track doesn't have timestamps or file positions then - // just trust that we got to the right place, since we have no - // way to tell where we are - else { - CurrentFrame = n; - continue; - } - } - - CurrentFrame = Frames.FrameFromPTS(StartTime); + CurrentFrame = Frames.FindPacket(*FirstPacket); // Is the seek destination time known? Does it belong to a frame? if (CurrentFrame < 0) { diff --git a/src/core/videosource.h b/src/core/videosource.h index 99fcf11cd2..2da221d3e2 100644 --- a/src/core/videosource.h +++ b/src/core/videosource.h @@ -131,7 +131,7 @@ struct FFMS_VideoSource { void SetVideoProperties(); bool DecodePacket(const AVPacket &Packet); - // Returns the first packet read that has nonzero PTS/DTS (depending on Frames.UseDTS) + // Returns the first packet read SmartAVPacket DecodeNextFrame(); bool SeekTo(int n, int SeekOffset); int Seek(int n);