From 5d349b60ecd1ff269881391db1eb6e543bc5f9c8 Mon Sep 17 00:00:00 2001
From: arch1t3cht <arch1t3cht@gmail.com>
Date: Tue, 4 Nov 2025 14:11:34 +0100
Subject: [PATCH 1/3] Use RAII wrapper for AVPackets

This is in preparation for DecodeNextFrame returning an AVPacket,
at which point a RAII wrapper may be helpful to better track ownership
semantics.

This uses a simple std::unique_ptr alias that handles av_packet_alloc
and av_packet_free, while av_packet_ref and av_packet_unref are still
handled manually.
---
 src/core/audiosource.cpp | 34 ++++++++++++----------------
 src/core/audiosource.h   |  2 +-
 src/core/indexing.cpp    | 48 ++++++++++++++++++----------------------
 src/core/indexing.h      |  6 ++---
 src/core/utils.h         | 12 ++++++++++
 src/core/videosource.cpp | 47 +++++++++++++++++----------------------
 src/core/videosource.h   |  4 ++--
 7 files changed, 73 insertions(+), 80 deletions(-)

diff --git a/src/core/audiosource.cpp b/src/core/audiosource.cpp
index dbc55cda02..31d589dc39 100644
--- a/src/core/audiosource.cpp
+++ b/src/core/audiosource.cpp
@@ -301,12 +301,9 @@ FFMS_AudioSource::AudioBlock *FFMS_AudioSource::CacheBlock(CacheIterator &pos) {
 }
 
 int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) {
-    AVPacket *Packet = av_packet_alloc();
-    if (!Packet)
-        throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_ALLOCATION_FAILED,
-            "Could not allocate packet.");
-    if (!ReadPacket(Packet)) {
-        av_packet_free(&Packet);
+    SmartAVPacket Packet;
+
+    if (!ReadPacket(*Packet)) {
         throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_UNKNOWN,
             "ReadPacket unexpectedly failed to read a packet");
     }
@@ -315,7 +312,7 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) {
     CurrentSample = CurrentFrame->SampleStart;
 
     // Value code intentionally ignored, combined with the checks when indexing this mostly gives the expected behavior
-    avcodec_send_packet(CodecContext, Packet);
+    avcodec_send_packet(CodecContext, Packet.get());
 
     int NumberOfSamples = 0;
     AudioBlock *CachedBlock = nullptr;
@@ -331,12 +328,11 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) {
             }
             break;
         } else if (Ret == AVERROR(EAGAIN)) {
-            if (!ReadPacket(Packet)) {
-                av_packet_free(&Packet);
+            if (!ReadPacket(*Packet)) {
                 throw FFMS_Exception(FFMS_ERROR_PARSER, FFMS_ERROR_UNKNOWN,
                     "ReadPacket unexpectedly failed to read a packet");
             }
-            avcodec_send_packet(CodecContext, Packet);
+            avcodec_send_packet(CodecContext, Packet.get());
         } else if (Ret == AVERROR_EOF) {
             break;
         } else {
@@ -344,8 +340,6 @@ int FFMS_AudioSource::DecodeNextBlock(CacheIterator *pos) {
         }
     }
 
-    av_packet_free(&Packet);
-
     // Zero sample packets aren't included in the index
     if (!NumberOfSamples)
         return NumberOfSamples;
@@ -553,24 +547,24 @@ void FFMS_AudioSource::Seek() {
     }
 }
 
-bool FFMS_AudioSource::ReadPacket(AVPacket *Packet) {
-    while (av_read_frame(FormatContext, Packet) >= 0) {
-        if (Packet->stream_index == TrackNumber) {
+bool FFMS_AudioSource::ReadPacket(AVPacket &Packet) {
+    while (av_read_frame(FormatContext, &Packet) >= 0) {
+        if (Packet.stream_index == TrackNumber) {
             // Required because not all audio packets, especially in ogg, have a pts. Use the previous valid packet's pts instead.
-            if (Packet->pts == AV_NOPTS_VALUE)
-                Packet->pts = LastValidTS;
+            if (Packet.pts == AV_NOPTS_VALUE)
+                Packet.pts = LastValidTS;
             else
-                LastValidTS = Packet->pts;
+                LastValidTS = Packet.pts;
 
             // This only happens if a really shitty demuxer seeks to a packet without pts *hrm* ogg *hrm* so read until a valid pts is reached
-            int64_t PacketTS = Frames.HasTS ? Packet->pts : Packet->pos;
+            int64_t PacketTS = Frames.HasTS ? Packet.pts : Packet.pos;
             if (PacketTS != AV_NOPTS_VALUE) {
                 while (PacketNumber > 0 && FrameTS(PacketNumber) > PacketTS) --PacketNumber;
                 while (FrameTS(PacketNumber) < PacketTS) ++PacketNumber;
                 return true;
             }
         }
-        av_packet_unref(Packet);
+        av_packet_unref(&Packet);
     }
     return false;
 }
diff --git a/src/core/audiosource.h b/src/core/audiosource.h
index a8efc474bd..6bd6e871b5 100644
--- a/src/core/audiosource.h
+++ b/src/core/audiosource.h
@@ -101,7 +101,7 @@ struct FFMS_AudioSource {
     // Called after seeking
     void Seek();
     // Read the next packet from the file
-    bool ReadPacket(AVPacket *);
+    bool ReadPacket(AVPacket &);
 
     // Close and reopen the source file to seek back to the beginning. Only
     // needs to do anything for formats that can't seek to the beginning otherwise.
diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp
index c13b8ee599..83d3f30ed8 100644
--- a/src/core/indexing.cpp
+++ b/src/core/indexing.cpp
@@ -305,10 +305,10 @@ FFMS_Indexer::FFMS_Indexer(const char *Filename, const FFMS_KeyValuePair *Demuxe
     }
 }
 
-uint32_t FFMS_Indexer::IndexAudioPacket(int Track, AVPacket *Packet, SharedAVContext &Context, FFMS_Index &TrackIndices) {
+uint32_t FFMS_Indexer::IndexAudioPacket(int Track, const AVPacket &Packet, SharedAVContext &Context, FFMS_Index &TrackIndices) {
     AVCodecContext *CodecContext = Context.CodecContext;
     int64_t StartSample = Context.CurrentSample;
-    int Ret = avcodec_send_packet(CodecContext, Packet);
+    int Ret = avcodec_send_packet(CodecContext, &Packet);
     if (Ret != 0) {
         if (ErrorHandling == FFMS_IEH_ABORT) {
             throw FFMS_Exception(FFMS_ERROR_CODEC, FFMS_ERROR_DECODING, "Audio decoding error");
@@ -364,7 +364,7 @@ void FFMS_Indexer::CheckAudioProperties(int Track, AVCodecContext *Context) {
     }
 }
 
-void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt, int *RepeatPict,
+void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, const AVPacket &pkt, int *RepeatPict,
                                     int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct) {
     if (VideoContext.Parser) {
         uint8_t *OB;
@@ -373,8 +373,8 @@ void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt
         av_parser_parse2(VideoContext.Parser,
             VideoContext.CodecContext,
             &OB, &OBSize,
-            pkt->data, pkt->size,
-            pkt->pts, pkt->dts, pkt->pos);
+            pkt.data, pkt.size,
+            pkt.pts, pkt.dts, pkt.pos);
 
         // H.264 (PAFF) and HEVC may have one field per packet, so we need to track
         // when we have a full or half frame available, and mark one of them as
@@ -394,15 +394,15 @@ void FFMS_Indexer::ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt
 
         *RepeatPict = VideoContext.Parser->repeat_pict;
         *FrameType = VideoContext.Parser->pict_type;
-        *Invisible = (VideoContext.Parser->repeat_pict < 0 || (pkt->flags & AV_PKT_FLAG_DISCARD));
+        *Invisible = (VideoContext.Parser->repeat_pict < 0 || (pkt.flags & AV_PKT_FLAG_DISCARD));
     } else {
-        *Invisible = !!(pkt->flags & AV_PKT_FLAG_DISCARD);
+        *Invisible = !!(pkt.flags & AV_PKT_FLAG_DISCARD);
     }
 
     if (VideoContext.CodecContext->codec_id == AV_CODEC_ID_VP8)
-        ParseVP8(pkt->data[0], Invisible, FrameType);
+        ParseVP8(pkt.data[0], Invisible, FrameType);
     else if (VideoContext.CodecContext->codec_id == AV_CODEC_ID_VP9)
-        ParseVP9(pkt->data[0], FrameType);
+        ParseVP9(pkt.data[0], FrameType);
 }
 
 void FFMS_Indexer::Free() {
@@ -502,34 +502,30 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
         }
     }
 
-    AVPacket *Packet = av_packet_alloc();
-    if (!Packet)
-        throw FFMS_Exception(FFMS_ERROR_CODEC, FFMS_ERROR_ALLOCATION_FAILED,
-            "Could not allocate packet.");
+    SmartAVPacket Packet;
     std::vector<int64_t> LastValidTS(FormatContext->nb_streams, AV_NOPTS_VALUE);
 
     int64_t filesize = avio_size(FormatContext->pb);
     enum AVPictureStructure LastPicStruct = AV_PICTURE_STRUCTURE_UNKNOWN;
     int ret;
-    while ((ret = av_read_frame(FormatContext, Packet)) >= 0) {
+    while ((ret = av_read_frame(FormatContext, Packet.get())) >= 0) {
         // Update progress
         // FormatContext->pb can apparently be NULL when opening images.
         if (IC && FormatContext->pb) {
             if ((*IC)(FormatContext->pb->pos, filesize, ICPrivate)) {
-                av_packet_free(&Packet);
                 throw FFMS_Exception(FFMS_ERROR_CANCELLED, FFMS_ERROR_USER,
                     "Cancelled by user");
             }
         }
         if (!IndexMask.count(Packet->stream_index)) {
-            av_packet_unref(Packet);
+            av_packet_unref(Packet.get());
             continue;
         }
 
         int Track = Packet->stream_index;
         FFMS_Track &TrackInfo = (*TrackIndices)[Track];
         bool KeyFrame = !!(Packet->flags & AV_PKT_FLAG_KEY);
-        ReadTS(Packet, LastValidTS[Track], (*TrackIndices)[Track].UseDTS);
+        ReadTS(*Packet, LastValidTS[Track], (*TrackIndices)[Track].UseDTS);
 
         if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             int64_t PTS = TrackInfo.UseDTS ? Packet->dts : Packet->pts;
@@ -545,7 +541,6 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
                 bool HasAltRefs = (FormatContext->streams[Track]->codecpar->codec_id == AV_CODEC_ID_VP8 ||
                                    FormatContext->streams[Track]->codecpar->codec_id == AV_CODEC_ID_VP9);
                 if (Packet->duration == 0 && !HasAltRefs) {
-                    av_packet_free(&Packet);
                     throw FFMS_Exception(FFMS_ERROR_INDEXING, FFMS_ERROR_PARSER,
                         "Invalid packet pts, dts, and duration");
                 }
@@ -562,7 +557,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
             int FrameType = 0;
             bool Invisible = false;
             bool SecondField = false;
-            ParseVideoPacket(AVContexts[Track], Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct);
+            ParseVideoPacket(AVContexts[Track], *Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct);
 
             TrackInfo.AddVideoFrame(PTS, Packet->dts, RepeatPict, KeyFrame,
                 FrameType, Packet->pos, Invisible, SecondField);
@@ -574,7 +569,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
                 TrackInfo.HasTS = true;
 
             int64_t StartSample = AVContexts[Track].CurrentSample;
-            uint32_t SampleCount = IndexAudioPacket(Track, Packet, AVContexts[Track], *TrackIndices);
+            uint32_t SampleCount = IndexAudioPacket(Track, *Packet, AVContexts[Track], *TrackIndices);
             TrackInfo.SampleRate = AVContexts[Track].CodecContext->sample_rate;
 
             TrackInfo.AddAudioFrame(LastValidTS[Track], Packet->dts,
@@ -584,9 +579,8 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
         if (!(Packet->flags & AV_PKT_FLAG_DISCARD))
             TrackInfo.LastDuration = Packet->duration;
 
-        av_packet_unref(Packet);
+        av_packet_unref(Packet.get());
     }
-    av_packet_free(&Packet);
     if (IsIOError(ret))
         throw FFMS_Exception(FFMS_ERROR_INDEXING, FFMS_ERROR_FILE_READ,
             "Indexing failed: " + AVErrorToString(ret));
@@ -595,11 +589,11 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
     return TrackIndices.release();
 }
 
-void FFMS_Indexer::ReadTS(const AVPacket *Packet, int64_t &TS, bool &UseDTS) {
-    if (!UseDTS && Packet->pts != AV_NOPTS_VALUE)
-        TS = Packet->pts;
+void FFMS_Indexer::ReadTS(const AVPacket &Packet, int64_t &TS, bool &UseDTS) {
+    if (!UseDTS && Packet.pts != AV_NOPTS_VALUE)
+        TS = Packet.pts;
     if (TS == AV_NOPTS_VALUE)
         UseDTS = true;
-    if (UseDTS && Packet->dts != AV_NOPTS_VALUE)
-        TS = Packet->dts;
+    if (UseDTS && Packet.dts != AV_NOPTS_VALUE)
+        TS = Packet.dts;
 }
diff --git a/src/core/indexing.h b/src/core/indexing.h
index a108648028..4b0360c8c8 100644
--- a/src/core/indexing.h
+++ b/src/core/indexing.h
@@ -82,10 +82,10 @@ struct FFMS_Indexer {
     int64_t Filesize;
     uint8_t Digest[20];
 
-    void ReadTS(const AVPacket *Packet, int64_t &TS, bool &UseDTS);
+    void ReadTS(const AVPacket &Packet, int64_t &TS, bool &UseDTS);
     void CheckAudioProperties(int Track, AVCodecContext *Context);
-    uint32_t IndexAudioPacket(int Track, AVPacket *Packet, SharedAVContext &Context, FFMS_Index &TrackIndices);
-    void ParseVideoPacket(SharedAVContext &VideoContext, AVPacket *pkt, int *RepeatPict, int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct);
+    uint32_t IndexAudioPacket(int Track, const AVPacket &Packet, SharedAVContext &Context, FFMS_Index &TrackIndices);
+    void ParseVideoPacket(SharedAVContext &VideoContext, const AVPacket &pkt, int *RepeatPict, int *FrameType, bool *Invisible, bool *SecondField, enum AVPictureStructure *LastPicStruct);
     void Free();
 public:
     FFMS_Indexer(const char *Filename, const FFMS_KeyValuePair *DemuxerOptions, int NumOptions);
diff --git a/src/core/utils.h b/src/core/utils.h
index 14492d9798..39bd41dfdf 100644
--- a/src/core/utils.h
+++ b/src/core/utils.h
@@ -63,6 +63,18 @@ void FillAP(FFMS_AudioProperties &AP, AVCodecContext *CTX, FFMS_Track &Frames);
 
 void LAVFOpenFile(const char *SourceFile, AVFormatContext *&FormatContext, int Track, const std::map<std::string, std::string> &LAVFOpts);
 
+// RAII wrapper for AVPacket * that handles av_packet_alloc and av_packet_free.
+// av_packet_ref and av_packet_unref still need to be handled by user code.
+class SmartAVPacket : public std::unique_ptr<AVPacket, void(*)(AVPacket *)> {
+public:
+    SmartAVPacket() : std::unique_ptr<AVPacket, void(*)(AVPacket *)>(av_packet_alloc(), [](AVPacket *packet) { av_packet_free(&packet); }) {
+        if (!*this) {
+            throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
+                "Could not allocate packet.");
+        }
+    }
+};
+
 namespace optdetail {
     template<typename T>
     T get_av_opt(void *v, const char *name) {
diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp
index 75a91f820a..394a3beb96 100644
--- a/src/core/videosource.cpp
+++ b/src/core/videosource.cpp
@@ -252,11 +252,10 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
 
         DecodeFrame = av_frame_alloc();
         LastDecodedFrame = av_frame_alloc();
-        StashedPacket = av_packet_alloc();
 
-        if (!DecodeFrame || !LastDecodedFrame || !StashedPacket)
+        if (!DecodeFrame || !LastDecodedFrame)
             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
-                "Could not allocate dummy frame / stashed packet.");
+                "Could not allocate dummy frame.");
 
         // Dummy allocations so the unallocated case doesn't have to be handled later
         if (av_image_alloc(SWSFrameData, SWSFrameLinesize, 16, 16, AV_PIX_FMT_GRAY8, 4) < 0)
@@ -742,15 +741,15 @@ void FFMS_VideoSource::CopyEye(AVStereo3DView view) {
     }
 }
 
-bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {
+bool FFMS_VideoSource::DecodePacket(const AVPacket &Packet) {
     std::swap(DecodeFrame, LastDecodedFrame);
     ResendPacket = false;
 
-    int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet->dts : Packet->pts, true);
-    bool PacketHidden = !!(Packet->flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden);
+    int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet.dts : Packet.pts, true);
+    bool PacketHidden = !!(Packet.flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden);
     bool SecondField = PacketNum != -1 && Frames[PacketNum].SecondField;
 
-    int Ret = avcodec_send_packet(CodecContext, Packet);
+    int Ret = avcodec_send_packet(CodecContext, &Packet);
     if (Ret == AVERROR(EAGAIN)) {
         // Send queue is full, so stash packet to resend on the next call.
         ResendPacket = true;
@@ -818,7 +817,7 @@ int FFMS_VideoSource::Seek(int n) {
     // We always assume seeking is possible if the first seek succeeds
     avcodec_flush_buffers(CodecContext);
     ResendPacket = false;
-    av_packet_unref(StashedPacket);
+    av_packet_unref(StashedPacket.get());
 
     // When it's 0 we always know what the next frame is (or more exactly should be)
     if (n == 0)
@@ -839,7 +838,6 @@ void FFMS_VideoSource::Free() {
     av_freep(&RightEyeFrameData[0]);
     av_frame_free(&DecodeFrame);
     av_frame_free(&LastDecodedFrame);
-    av_packet_free(&StashedPacket);
 }
 
 void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
@@ -848,25 +846,22 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
     if (HasPendingDelayedFrames())
         return;
 
-    AVPacket *Packet = av_packet_alloc();
-    if (!Packet)
-        throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
-            "Could not allocate packet.");
+    SmartAVPacket Packet;
 
     int ret;
     if (ResendPacket) {
         // If we have a packet previously stashed due to a full input queue,
         // send it again.
         ret = 0;
-        av_packet_ref(Packet, StashedPacket);
-        av_packet_unref(StashedPacket);
+        av_packet_ref(Packet.get(), StashedPacket.get());
+        av_packet_unref(StashedPacket.get());
     } else {
-        ret = av_read_frame(FormatContext, Packet);
+        ret = av_read_frame(FormatContext, Packet.get());
     }
     while (ret >= 0) {
         if (Packet->stream_index != VideoTrack) {
-            av_packet_unref(Packet);
-            ret = av_read_frame(FormatContext, Packet);
+            av_packet_unref(Packet.get());
+            ret = av_read_frame(FormatContext, Packet.get());
             continue;
         }
 
@@ -876,21 +871,20 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
         if (Pos < 0)
             Pos = Packet->pos;
 
-        bool FrameFinished = DecodePacket(Packet);
+        bool FrameFinished = DecodePacket(*Packet);
         if (ResendPacket)
-            av_packet_ref(StashedPacket, Packet);
-        av_packet_unref(Packet);
+            av_packet_ref(StashedPacket.get(), Packet.get());
+        av_packet_unref(Packet.get());
         if (FrameFinished) {
-            av_packet_free(&Packet);
             return;
         }
 
         if (ResendPacket) {
             ret = 0;
-            av_packet_ref(Packet, StashedPacket);
-            av_packet_unref(StashedPacket);
+            av_packet_ref(Packet.get(), StashedPacket.get());
+            av_packet_unref(StashedPacket.get());
         } else {
-            ret = av_read_frame(FormatContext, Packet);
+            ret = av_read_frame(FormatContext, Packet.get());
         }
     }
     if (IsIOError(ret))
@@ -898,8 +892,7 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
             "Failed to read packet: " + AVErrorToString(ret));
 
     // Flush final frames
-    DecodePacket(Packet);
-    av_packet_free(&Packet);
+    DecodePacket(*Packet);
 }
 
 bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) {
diff --git a/src/core/videosource.h b/src/core/videosource.h
index 6b58a06db1..8d40b3bf83 100644
--- a/src/core/videosource.h
+++ b/src/core/videosource.h
@@ -99,7 +99,7 @@ struct FFMS_VideoSource {
     uint8_t *RightEyeFrameData[4] = {};
     int RightEyeLinesize[4] = {};
 
-    AVPacket *StashedPacket = nullptr;
+    SmartAVPacket StashedPacket;
     bool ResendPacket = false;
 
     void DetectInputFormat();
@@ -129,7 +129,7 @@ struct FFMS_VideoSource {
     void ReAdjustOutputFormat(AVFrame *Frame);
     FFMS_Frame *OutputFrame(AVFrame *Frame);
     void SetVideoProperties();
-    bool DecodePacket(AVPacket *Packet);
+    bool DecodePacket(const AVPacket &Packet);
     void DecodeNextFrame(int64_t &PTS, int64_t &Pos);
     bool SeekTo(int n, int SeekOffset);
     int Seek(int n);

From 8c8274ea8e7520085934058d0c89a408db332ddb Mon Sep 17 00:00:00 2001
From: arch1t3cht <arch1t3cht@gmail.com>
Date: Tue, 4 Nov 2025 17:32:17 +0100
Subject: [PATCH 2/3] Return a full AVPacket in DecodeNextFrame

Instead of writing the pts and pos to output args.
This does slightly change behavior: Previously, DecodeNextFrame would
return
- the pos of the first packet read, and
- the first *nonnegative* pts (or dts if Frames.UseDTS is true) of a
  packet read.
Hence, AStartTime and Pos could potentially belong to different packets
if the first packet read had a negative PTS, like in an mp4 whose start
is cut off by an edit list.
In such cases, returning the first nonnegative PTS is the necessary with
the current logic in GetFrame (which excludes hidden frames in the
FrameFromPTS call), so keep this logic for now. This will change in the
next commit, but this way this intermediate commit won't break any test
files.
---
 src/core/videosource.cpp | 33 ++++++++++++++++-----------------
 src/core/videosource.h   |  4 +++-
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp
index 394a3beb96..d26b223571 100644
--- a/src/core/videosource.cpp
+++ b/src/core/videosource.cpp
@@ -328,8 +328,7 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
         SeekByPos = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw");
 
         // Always try to decode a frame to make sure all required parameters are known
-        int64_t DummyPTS = 0, DummyPos = 0;
-        DecodeNextFrame(DummyPTS, DummyPos);
+        DecodeNextFrame();
 
         //VP.image_type = VideoInfo::IT_TFF;
         VP.FPSDenominator = FormatContext->streams[VideoTrack]->time_base.num;
@@ -840,13 +839,11 @@ void FFMS_VideoSource::Free() {
     av_frame_free(&LastDecodedFrame);
 }
 
-void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
-    AStartTime = -1;
+SmartAVPacket FFMS_VideoSource::DecodeNextFrame() {
+    SmartAVPacket Packet, FirstPacket;
 
     if (HasPendingDelayedFrames())
-        return;
-
-    SmartAVPacket Packet;
+        return FirstPacket;
 
     int ret;
     if (ResendPacket) {
@@ -865,18 +862,17 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
             continue;
         }
 
-        if (AStartTime < 0)
-            AStartTime = Frames.UseDTS ? Packet->dts : Packet->pts;
-
-        if (Pos < 0)
-            Pos = Packet->pos;
+        if (!FirstPacket->data || (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts) < 0) {
+            av_packet_unref(FirstPacket.get());
+            av_packet_ref(FirstPacket.get(), Packet.get());
+        }
 
         bool FrameFinished = DecodePacket(*Packet);
         if (ResendPacket)
             av_packet_ref(StashedPacket.get(), Packet.get());
         av_packet_unref(Packet.get());
         if (FrameFinished) {
-            return;
+            return FirstPacket;
         }
 
         if (ResendPacket) {
@@ -893,6 +889,7 @@ void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
 
     // Flush final frames
     DecodePacket(*Packet);
+    return FirstPacket;
 }
 
 bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) {
@@ -961,21 +958,23 @@ FFMS_Frame *FFMS_VideoSource::GetFrame(int n) {
             WasSkipped = false;
         }
 
-        int64_t StartTime = AV_NOPTS_VALUE, FilePos = -1;
+        SmartAVPacket FirstPacket;
         bool Skipped = (((unsigned) CurrentFrame < Frames.size()) && Frames[CurrentFrame].Skipped());
         if (HasSeeked || !Skipped) {
             if (WasSkipped)
                 WasSkipped = false;
             else
-                DecodeNextFrame(StartTime, FilePos);
+                FirstPacket = DecodeNextFrame();
         }
 
         if (!HasSeeked)
             continue;
 
+        int64_t StartTime = FirstPacket->data == nullptr ? AV_NOPTS_VALUE : (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts);
+
         if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) {
-            if (FilePos >= 0) {
-                CurrentFrame = Frames.FrameFromPos(FilePos);
+            if (FirstPacket->data) {
+                CurrentFrame = Frames.FrameFromPos(FirstPacket->pos);
                 if (CurrentFrame >= 0)
                     continue;
             }
diff --git a/src/core/videosource.h b/src/core/videosource.h
index 8d40b3bf83..99fcf11cd2 100644
--- a/src/core/videosource.h
+++ b/src/core/videosource.h
@@ -130,7 +130,9 @@ struct FFMS_VideoSource {
     FFMS_Frame *OutputFrame(AVFrame *Frame);
     void SetVideoProperties();
     bool DecodePacket(const AVPacket &Packet);
-    void DecodeNextFrame(int64_t &PTS, int64_t &Pos);
+
+    // Returns the first packet read that has nonzero PTS/DTS (depending on Frames.UseDTS)
+    SmartAVPacket DecodeNextFrame();
     bool SeekTo(int n, int SeekOffset);
     int Seek(int n);
     void Free();

From 9c943eb6d5b7e8442f307f2cb12b550f8c345ed9 Mon Sep 17 00:00:00 2001
From: arch1t3cht <arch1t3cht@gmail.com>
Date: Tue, 23 Dec 2025 00:56:18 +0100
Subject: [PATCH 3/3] Rework packet finding functions

After seeking, GetFrame() tries to find out where the demuxer ended up
after the seek by trying to recognize the first packet that comes out of
the demuxer afterwards by its PTS (or DTS if Frames.UseDTS is true), or
by its Pos if that fails.
Similarly, DecodePacket() needs to recognize the packet to see if it's a
second field of some interlaced frame.
When the same PTS can appear multiple times in the same file (e.g. in
files with edit lists), this can cause issues.

This commit replaces the FrameFromPTS / FrameFromPos functions and the
fallback logic in GetFrame() by a single FindPacket function that takes
an entire AVPacket.
FindPacket() tries to find the unique packet in the index whose
TS/Pos/Flags match the given packet. If no such packet exists,
it tries a fallback sequence of comparing fewer fields.

The current fallback sequence is mostly chosen based on intuition - for
well-behaved files simply checking all fields should work fine. If
examples come up where this fallback sequence becomes relevant, it can
be adjusted.
---
 src/core/track.cpp       | 86 ++++++++++++++++++++++++++++++++++------
 src/core/track.h         |  4 +-
 src/core/videosource.cpp | 24 ++---------
 src/core/videosource.h   |  2 +-
 4 files changed, 79 insertions(+), 37 deletions(-)

diff --git a/src/core/track.cpp b/src/core/track.cpp
index 604e1493df..b75704f1f6 100644
--- a/src/core/track.cpp
+++ b/src/core/track.cpp
@@ -25,6 +25,7 @@
 #include "indexing.h"
 
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <cstdlib>
 
@@ -157,23 +158,82 @@ static bool PTSComparison(FrameInfo FI1, FrameInfo FI2) {
     return FI1.PTS < FI2.PTS;
 }
 
-int FFMS_Track::FrameFromPTS(int64_t PTS, bool AllowHidden) const {
+enum class AVPacketProp {
+    TS,     // can be PTS or DTS depending on UseDTS
+    Pos,
+    Hidden,
+    Key,
+};
+
+const std::array<std::vector<AVPacketProp>, 5> FindPacketCheckSequence = {{
+    {AVPacketProp::TS, AVPacketProp::Pos, AVPacketProp::Hidden, AVPacketProp::Key},
+    {AVPacketProp::TS, AVPacketProp::Pos, AVPacketProp::Hidden},
+    {AVPacketProp::TS, AVPacketProp::Pos},
+    {AVPacketProp::TS},
+    {AVPacketProp::Pos},
+}};
+
+// Attempts to find the given AVPacket in the track's list of FrameInfos,
+// returning the FrameInfo's index if one was found and -1 if not.
+//
+// The finding logic begins by trying to find a *unique* FrameInfo whose
+// PTS, Pos, and flags match the given packet.
+// If no such packet exists, it tries a fallback sequence of comparing fewer fields.
+//
+// The current fallback sequence is mostly chosen based on intuition - for
+// well-behaved files simply checking all fields should work fine.
+// If examples come up where this fallback sequence becomes relevant, it can
+// be adjusted.
+int FFMS_Track::FindPacket(const AVPacket &packet) const {
     FrameInfo F;
-    F.PTS = PTS;
+    F.PTS = UseDTS ? packet.dts : packet.pts;
+
+    auto SameTSBegin = std::lower_bound(begin(), end(), F, PTSComparison);;
+    auto SameTSEnd = SameTSBegin;
+    while (SameTSEnd != end() && SameTSEnd->PTS == F.PTS)
+        SameTSEnd++;
+
+    for (auto const& checks : FindPacketCheckSequence) {
+        // If the check sequence includes the timestamp, we can begin with a binary search
+        // since the FrameInfos are sorted by their timestamp, and then do a linear search in the found interval.
+        // If the check sequence does not include the timestamp, we have to do a full linear search.
+        auto Begin = begin();
+        auto End = end();
+
+        if (checks[0] == AVPacketProp::TS) {
+            Begin = SameTSBegin;
+            End = SameTSEnd;
+        }
 
-    auto Pos = std::lower_bound(begin(), end(), F, PTSComparison);
-    while (Pos != end() && (!AllowHidden && Pos->Skipped()) && Pos->PTS == PTS)
-        Pos++;
+        int found = 0;
+        int result = -1;
+
+        for (auto it = Begin; it < End; it++) {
+            bool match = std::all_of(checks.cbegin(), checks.cend(), [&](AVPacketProp check) {
+                    switch (check) {
+                        case AVPacketProp::TS:
+                            return it->PTS == F.PTS;
+                        case AVPacketProp::Pos:
+                            return it->FilePos == packet.pos;
+                        case AVPacketProp::Hidden:
+                            return it->MarkedHidden == (packet.flags & AV_PKT_FLAG_DISCARD);
+                        case AVPacketProp::Key:
+                            return it->KeyFrame == (packet.flags & AV_PKT_FLAG_KEY);
+                    }
+                    return false;
+                });
+
+            if (match) {
+                found++;
+                result = std::distance(begin(), it);
+            }
+        }
 
-    if (Pos == end() || Pos->PTS != PTS)
-        return -1;
-    return std::distance(begin(), Pos);
-}
+        if (found == 1) {
+            return result;
+        }
+    }
 
-int FFMS_Track::FrameFromPos(int64_t Pos, bool AllowHidden) const {
-    for (size_t i = 0; i < size(); i++)
-        if (Data->Frames[i].FilePos == Pos && (AllowHidden || !Data->Frames[i].Skipped()))
-            return static_cast<int>(i);
     return -1;
 }
 
diff --git a/src/core/track.h b/src/core/track.h
index dde1321881..1a26399384 100644
--- a/src/core/track.h
+++ b/src/core/track.h
@@ -27,6 +27,7 @@
 #include <vector>
 #include <memory>
 
+struct AVPacket;
 class ZipFile;
 
 struct FrameInfo {
@@ -82,8 +83,7 @@ struct FFMS_Track {
     void FillAudioGaps();
 
     int FindClosestVideoKeyFrame(int Frame) const;
-    int FrameFromPTS(int64_t PTS, bool AllowHidden = false) const;
-    int FrameFromPos(int64_t Pos, bool AllowHidden = false) const;
+    int FindPacket(const AVPacket &packet) const;
     int ClosestFrameFromPTS(int64_t PTS) const;
     int RealFrameNumber(int Frame) const;
     int VisibleFrameCount() const;
diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp
index d26b223571..6116d04435 100644
--- a/src/core/videosource.cpp
+++ b/src/core/videosource.cpp
@@ -744,7 +744,7 @@ bool FFMS_VideoSource::DecodePacket(const AVPacket &Packet) {
     std::swap(DecodeFrame, LastDecodedFrame);
     ResendPacket = false;
 
-    int PacketNum = Frames.FrameFromPTS(Frames.UseDTS ? Packet.dts : Packet.pts, true);
+    int PacketNum = Frames.FindPacket(Packet);
     bool PacketHidden = !!(Packet.flags & AV_PKT_FLAG_DISCARD) || (PacketNum != -1 && Frames[PacketNum].MarkedHidden);
     bool SecondField = PacketNum != -1 && Frames[PacketNum].SecondField;
 
@@ -862,10 +862,8 @@ SmartAVPacket FFMS_VideoSource::DecodeNextFrame() {
             continue;
         }
 
-        if (!FirstPacket->data || (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts) < 0) {
-            av_packet_unref(FirstPacket.get());
+        if (!FirstPacket->data)
             av_packet_ref(FirstPacket.get(), Packet.get());
-        }
 
         bool FrameFinished = DecodePacket(*Packet);
         if (ResendPacket)
@@ -971,23 +969,7 @@ FFMS_Frame *FFMS_VideoSource::GetFrame(int n) {
             continue;
 
         int64_t StartTime = FirstPacket->data == nullptr ? AV_NOPTS_VALUE : (Frames.UseDTS ? FirstPacket->dts : FirstPacket->pts);
-
-        if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) {
-            if (FirstPacket->data) {
-                CurrentFrame = Frames.FrameFromPos(FirstPacket->pos);
-                if (CurrentFrame >= 0)
-                    continue;
-            }
-            // If the track doesn't have timestamps or file positions then
-            // just trust that we got to the right place, since we have no
-            // way to tell where we are
-            else {
-                CurrentFrame = n;
-                continue;
-            }
-        }
-
-        CurrentFrame = Frames.FrameFromPTS(StartTime);
+        CurrentFrame = Frames.FindPacket(*FirstPacket);
 
         // Is the seek destination time known? Does it belong to a frame?
         if (CurrentFrame < 0) {
diff --git a/src/core/videosource.h b/src/core/videosource.h
index 99fcf11cd2..2da221d3e2 100644
--- a/src/core/videosource.h
+++ b/src/core/videosource.h
@@ -131,7 +131,7 @@ struct FFMS_VideoSource {
     void SetVideoProperties();
     bool DecodePacket(const AVPacket &Packet);
 
-    // Returns the first packet read that has nonzero PTS/DTS (depending on Frames.UseDTS)
+    // Returns the first packet read
     SmartAVPacket DecodeNextFrame();
     bool SeekTo(int n, int SeekOffset);
     int Seek(int n);