From c970780c56356f01f5a4d44a8839f4e597e1af46 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Fri, 12 Jun 2020 09:54:08 +0200
Subject: [PATCH 01/10] Prevent unnecessary copying of video frames

---
 .../Compositor/CompositorInterface.cpp        | 18 +++-
 .../Compositor/CompositorInterface.h          |  3 +-
 .../Compositor/VideoEncoder.cpp               | 83 +++++++++++--------
 .../Compositor/VideoEncoder.h                 | 39 +++++----
 .../UnityPlugin/UnityCompositorInterface.cpp  |  6 +-
 5 files changed, 92 insertions(+), 57 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
index f9c558341..08d1e056e 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
@@ -386,10 +386,20 @@ void CompositorInterface::StopRecording()
     activeVideoEncoder = nullptr;
 }
 
-void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames)
+std::unique_ptr<VideoEncoder::VideoInput> CompositorInterface::GetAvailableRecordFrame()
+{
+    if (activeVideoEncoder == nullptr)
+    {
+        OutputDebugString(L"GetAvailableRecordFrame dropped, no active encoder\n");
+        return nullptr;
+    }
+    return activeVideoEncoder->GetAvailableVideoFrame();
+}
+
+void CompositorInterface::RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput> frame, int numFrames)
 {
 #if _DEBUG
-	std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frameTime) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
+	std::wstring debugString = L"RecordFrameAsync called, frameTime:" + std::to_wstring(frame->timestamp) + L", numFrames:" + std::to_wstring(numFrames) + L"\n";
 	OutputDebugString(debugString.data());
 #endif
 
@@ -407,8 +417,8 @@ void CompositorInterface::RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime,
 	// The encoder will update sample times internally based on the first seen sample time when recording.
 	// The encoder, however, does assume that audio and video samples will be based on the same source time.
 	// Providing audio and video samples with different starting times will cause issues in the generated video file.
-	LONGLONG sampleTime = frameTime;
-    activeVideoEncoder->QueueVideoFrame(videoFrame, sampleTime, numFrames * frameProvider->GetDurationHNS());
+    frame->duration = numFrames * frameProvider->GetDurationHNS();
+    activeVideoEncoder->QueueVideoFrame(std::move(frame));
 }
 
 void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize)
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
index e3e155f23..b349b1267 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.h
@@ -80,7 +80,8 @@ class CompositorInterface
     DLLEXPORT void StopRecording();
     
 	// frameTime is in hundred nano seconds
-	DLLEXPORT void RecordFrameAsync(BYTE* videoFrame, LONGLONG frameTime, int numFrames);
+    DLLEXPORT std::unique_ptr<VideoEncoder::VideoInput> GetAvailableRecordFrame();
+	DLLEXPORT void RecordFrameAsync(std::unique_ptr<VideoEncoder::VideoInput>, int numFrames);
 
 	// audioTime is in hundrend nano seconds
     DLLEXPORT void RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audioTime, int audioSize);
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 5f9dcf694..267f540af 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -6,6 +6,8 @@
 
 #include "codecapi.h"
 
+#define NUM_VIDEO_BUFFERS 10
+
 VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride, UINT fps,
     UINT32 audioSampleRate, UINT32 audioChannels, UINT32 audioBPS, UINT32 videoBitrate, UINT32 videoMpegLevel) :
     frameWidth(frameWidth),
@@ -21,10 +23,15 @@ VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride,
     isRecording(false)
 {
 #if HARDWARE_ENCODE_VIDEO
-  inputFormat = MFVideoFormat_NV12;
+    inputFormat = MFVideoFormat_NV12;
 #else
-  inputFormat = MFVideoFormat_RGB32;
+    inputFormat = MFVideoFormat_RGB32;
 #endif
+
+    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
+    {
+        videoInputPool.push(std::make_unique<VideoInput>(frameHeight * frameStride));
+    }
 }
 
 VideoEncoder::~VideoEncoder()
@@ -284,12 +291,12 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
 #endif
 }
 
-void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 #if _DEBUG
 	{
-		std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Writing Video, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 	}
 #endif
@@ -302,54 +309,45 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
 
 	if (startTime == INVALID_TIMESTAMP)
 	{
-		startTime = timestamp;
+		startTime = frame->timestamp;
 #if _DEBUG 
-		std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Start time set from video, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 	}
-    else if (timestamp < startTime)
+    else if (frame->timestamp < startTime)
     {
 #if _DEBUG 
-		std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Video not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
         return;
     }
 
-    if (timestamp == prevVideoTime)
+    if (frame->timestamp == prevVideoTime)
     {
 #if _DEBUG 
-		std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
+		std::wstring debugString = L"Video not recorded, Timestamp equals prevVideoTime. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(prevVideoTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
         return;
     }
     
-    LONGLONG sampleTimeNow = timestamp;
+    LONGLONG sampleTimeNow = frame->timestamp;
     LONGLONG sampleTimeStart = startTime;
 
     LONGLONG sampleTime = sampleTimeNow - sampleTimeStart;
 
     if (prevVideoTime != INVALID_TIMESTAMP)
     {
-        duration = sampleTime - prevVideoTime;
+        frame->duration = sampleTime - prevVideoTime;
 #if _DEBUG 
-		std::wstring debugString = L"Updated write video duration:" + std::to_wstring(duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
+		std::wstring debugString = L"Updated write video duration:" + std::to_wstring(frame->duration) + L", SampleTime:" + std::to_wstring(sampleTime) + L", PrevVideoTime:" + std::to_wstring(prevVideoTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
     }
 
-    // Copy frame to a temporary buffer and process on a background thread.
-#if HARDWARE_ENCODE_VIDEO
-    BYTE* tmpVideoBuffer = new BYTE[(int)(FRAME_BPP_NV12 * frameHeight * frameWidth)];
-    memcpy(tmpVideoBuffer, buffer, (int)(FRAME_BPP_NV12 * frameHeight * frameWidth));
-#else
-    BYTE* tmpVideoBuffer = new BYTE[frameHeight * frameStride];
-    memcpy(tmpVideoBuffer, buffer, frameHeight * frameStride);
-#endif
-
-    concurrency::create_task([=]()
+    std::async(std::launch::async, [=, frame{ std::move(frame) }]() mutable
     {
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
@@ -357,7 +355,6 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
         if (sinkWriter == NULL || !isRecording)
         {
             OutputDebugString(L"Must start recording before writing video frames.\n");
-            delete[] tmpVideoBuffer;
             return;
         }
 
@@ -387,7 +384,7 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
             hr = MFCopyImage(
                 pData,                      // Destination buffer.
                 cbWidth,                    // Destination stride.
-                tmpVideoBuffer,
+                frame->buffer,
                 cbWidth,                    // Source stride.
                 cbWidth,                    // Image width in bytes.
                 imageHeight                 // Image height in pixels.
@@ -401,7 +398,7 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
 
 #if _DEBUG
 		{
-			std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
+			std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
 			OutputDebugString(debugString.data());
 		}
 #endif
@@ -414,14 +411,18 @@ void VideoEncoder::WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duratio
         if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(pVideoBuffer); }
 
         if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleTime(sampleTime); } //100-nanosecond units
-        if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(duration); } //100-nanosecond units
+        if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(frame->duration); } //100-nanosecond units
 
         // Send the sample to the Sink Writer.
         if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(videoStreamIndex, pVideoSample); }
 
         SafeRelease(pVideoSample);
         SafeRelease(pVideoBuffer);
-        delete[] tmpVideoBuffer;
+
+        {
+            std::shared_lock<std::shared_mutex>(videoInputPoolLock);
+            videoInputPool.push(std::move(frame));
+        }
 
         if (FAILED(hr))
         {
@@ -508,17 +509,32 @@ void VideoEncoder::StopRecording()
     SafeRelease(sinkWriter);
 }
 
-void VideoEncoder::QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
+{
+    std::shared_lock<std::shared_mutex> lock(videoInputPoolLock);
+    if (videoInputPool.empty())
+    {
+        return std::make_unique<VideoInput>(frameStride * frameHeight);
+    }
+    else
+    {
+        auto result = std::move(videoInputPool.front());
+        videoInputPool.pop();
+        return result;
+    }
+}
+
+void VideoEncoder::QueueVideoFrame(std::unique_ptr<VideoEncoder::VideoInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
     if (acceptQueuedFrames)
     {
-        videoQueue.push(VideoInput(buffer, timestamp, duration));
 #if _DEBUG
-		std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(timestamp) + L"\n";
-		OutputDebugString(debugString.data());
+        std::wstring debugString = L"Pushed Video Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
+        OutputDebugString(debugString.data());
 #endif
+        videoQueue.push(std::move(frame));
     }
 }
 
@@ -548,8 +564,7 @@ void VideoEncoder::Update()
     {
         if (isRecording)
         {
-            VideoInput input = videoQueue.front();
-            WriteVideo(input.sharedBuffer, input.timestamp, input.duration);
+            WriteVideo(std::move(videoQueue.front()));
             videoQueue.pop();
         }
     }
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index 350300564..59ba2601f 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -38,34 +38,39 @@ class VideoEncoder
     void StopRecording();
 
     // Used for recording video from a background thread.
-    void QueueVideoFrame(byte* buffer, LONGLONG timestamp, LONGLONG duration);
+    class VideoInput;
+    std::unique_ptr<VideoInput> GetAvailableVideoFrame();
+    void QueueVideoFrame(std::unique_ptr<VideoInput> frame);
     void QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp);
 
     // Do not call this from a background thread.
     void Update();
 
-private:
-    void WriteVideo(byte* buffer, LONGLONG timestamp, LONGLONG duration);
-    void WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp);
-
-    LARGE_INTEGER freq;
-
     class VideoInput
     {
     public:
-        byte * sharedBuffer;
-
-        LONGLONG timestamp;
-        LONGLONG duration;
+        VideoInput(size_t bufferSize)
+        {
+            buffer = new byte[bufferSize];
+        }
 
-        VideoInput(byte* buffer, LONGLONG timestamp, LONGLONG duration)
+        ~VideoInput()
         {
-            this->sharedBuffer = buffer;
-            this->timestamp = timestamp;
-            this->duration = duration;
+            delete[] buffer;
         }
+
+        byte* buffer;
+
+        LONGLONG timestamp = INVALID_TIMESTAMP;
+        LONGLONG duration = INVALID_TIMESTAMP;
     };
 
+private:
+    void WriteVideo(std::unique_ptr<VideoInput> frame);
+    void WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp);
+
+    LARGE_INTEGER freq;
+
     class AudioInput
     {
     public:
@@ -108,10 +113,12 @@ class VideoEncoder
 
     LONGLONG startTime = INVALID_TIMESTAMP;
 
-    std::queue<VideoInput> videoQueue;
+    std::queue<std::unique_ptr<VideoInput>> videoInputPool;
+    std::queue<std::unique_ptr<VideoInput>> videoQueue;
     std::queue<AudioInput> audioQueue;
 
     std::shared_mutex videoStateLock;
+    std::shared_mutex videoInputPoolLock;
 
 #if HARDWARE_ENCODE_VIDEO
     IMFDXGIDeviceManager* deviceManager = NULL;
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
index fb6433c6e..d19e34c8b 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
@@ -178,8 +178,10 @@ void UpdateVideoRecordingFrame()
         float bpp = FRAME_BPP_RGBA;
 #endif
 
-        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, videoBytes[videoBufferIndex], bpp);
-        ci->RecordFrameAsync(videoBytes[videoBufferIndex], queuedVideoFrameTime, queuedVideoFrameCount);
+        auto frame = ci->GetAvailableRecordFrame();
+        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->buffer, bpp);
+        frame->timestamp = queuedVideoFrameTime;
+        ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
     }
 
     if (lastVideoFrame >= 0 && lastRecordedVideoFrame != lastVideoFrame)

From 4432731dde3f1b9a6a854f235a27fb63539c7a86 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Fri, 12 Jun 2020 10:41:09 +0200
Subject: [PATCH 02/10] Ensure ordered frame writing

---
 .../SpectatorView.Compositor/Compositor/VideoEncoder.cpp    | 6 +++++-
 .../SpectatorView.Compositor/Compositor/VideoEncoder.h      | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 267f540af..6c9143212 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -347,8 +347,12 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
     }
 
-    std::async(std::launch::async, [=, frame{ std::move(frame) }]() mutable
+    videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
     {
+        if (previousWriteFuture.valid())
+        {
+            previousWriteFuture.wait();
+        }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
         HRESULT hr = E_PENDING;
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index 59ba2601f..a9e4d17bb 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -119,6 +119,7 @@ class VideoEncoder
 
     std::shared_mutex videoStateLock;
     std::shared_mutex videoInputPoolLock;
+    std::future<void> videoWriteFuture;
 
 #if HARDWARE_ENCODE_VIDEO
     IMFDXGIDeviceManager* deviceManager = NULL;

From b54c64167cf4b13e5262e0f595afd6950f4ba9e5 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Fri, 12 Jun 2020 10:41:25 +0200
Subject: [PATCH 03/10] Remove unnecessary frame allocations

---
 .../UnityPlugin/UnityCompositorInterface.cpp  | 55 -------------------
 1 file changed, 55 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
index d19e34c8b..bd3682f19 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
@@ -26,56 +26,6 @@ static BYTE* depthBytes = new BYTE[FRAME_BUFSIZE_DEPTH16];
 static BYTE* bodyMaskBytes = new BYTE[FRAME_BUFSIZE_DEPTH16];
 static BYTE* holoBytes = new BYTE[FRAME_BUFSIZE_RGBA];
 
-#define NUM_VIDEO_BUFFERS 10
-
-static byte** videoBytes = nullptr;
-static int videoBufferIndex = 0;
-
-void AllocateVideoBuffers(VideoRecordingFrameLayout frameLayout)
-{
-    if (videoBytes != nullptr)
-        return;
-
-    videoBytes = new byte*[NUM_VIDEO_BUFFERS];
-
-    int frameBufferSize;
-    if (frameLayout == VideoRecordingFrameLayout::Quad)
-    {
-#if HARDWARE_ENCODE_VIDEO
-        frameBufferSize = QUAD_FRAME_BUFSIZE_NV12;
-#else
-        frameBufferSize = QUAD_FRAME_BUFSIZE_RGBA;
-#endif
-    }
-    else
-    {
-#if HARDWARE_ENCODE_VIDEO
-        frameBufferSize = FRAME_BUFSIZE_NV12;
-#else
-        frameBufferSize = FRAME_BUFSIZE_RGBA;
-#endif
-    }
-
-    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
-    {
-        videoBytes[i] = new byte[frameBufferSize];
-    }
-}
-
-void FreeVideoBuffers()
-{
-    if (videoBytes == nullptr)
-        return;
-
-    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
-    {
-        delete[] videoBytes[i];
-    }
-    delete[] videoBytes;
-    videoBytes = nullptr;
-}
-
-
 static ID3D11Texture2D* g_holoRenderTexture = nullptr;
 
 static ID3D11Texture2D* g_colorTexture = nullptr;
@@ -171,7 +121,6 @@ void UpdateVideoRecordingFrame()
     //We have an old frame, lets get the data and queue it now
     if (VideoTextureBuffer.IsDataAvailable())
     {
-        videoBufferIndex = (videoBufferIndex + 1) % NUM_VIDEO_BUFFERS;
 #if HARDWARE_ENCODE_VIDEO
         float bpp = FRAME_BPP_NV12;
 #else
@@ -446,8 +395,6 @@ UNITYDLL void StopFrameProvider()
     {
         ci->StopFrameProvider();
     }
-
-    FreeVideoBuffers();
 }
 
 UNITYDLL void SetAudioData(BYTE* audioData, int audioSize, double audioTime)
@@ -484,7 +431,6 @@ UNITYDLL bool StartRecording(VideoRecordingFrameLayout frameLayout, LPCWSTR lpcD
     {
         lastVideoFrame = -1;
 		lastRecordedVideoFrame = -1;
-        AllocateVideoBuffers(frameLayout);
         VideoTextureBuffer.ReleaseTextures();
         VideoTextureBuffer.Reset();
 		isRecording = ci->StartRecording(frameLayout, lpcDesiredFileName, desiredFileNameLength, inputFileNameLength, lpFileName, fileNameLength);
@@ -499,7 +445,6 @@ UNITYDLL void StopRecording()
     if (videoInitialized && ci != nullptr)
     {
         ci->StopRecording();
-        FreeVideoBuffers();
         isRecording = false;
     }
 }

From 9cc0fbbc5d1be73f014928dfc6310384d5c47125 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Wed, 17 Jun 2020 10:16:40 +0200
Subject: [PATCH 04/10] Move IMFMediaBuffer to frame class as to prevent
 another unnecessary memory allocation

---
 .../Compositor/VideoEncoder.cpp               | 34 +++----------------
 .../Compositor/VideoEncoder.h                 | 25 ++++++++++++--
 .../UnityPlugin/UnityCompositorInterface.cpp  |  2 +-
 3 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 6c9143212..5d1cbdaa1 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -355,7 +355,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
-        HRESULT hr = E_PENDING;
+        HRESULT hr = S_OK;
         if (sinkWriter == NULL || !isRecording)
         {
             OutputDebugString(L"Must start recording before writing video frames.\n");
@@ -373,32 +373,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
 
         IMFSample* pVideoSample = NULL;
-        IMFMediaBuffer* pVideoBuffer = NULL;
-        BYTE* pData = NULL;
-
-        // Create a new memory buffer.
-        hr = MFCreateMemoryBuffer(cbBuffer, &pVideoBuffer);
-
-        // Lock the buffer and copy the video frame to the buffer.
-        if (SUCCEEDED(hr)) { hr = pVideoBuffer->Lock(&pData, NULL, NULL); }
-
-        if (SUCCEEDED(hr))
-        {
-            //TODO: Can pVideoBuffer be created from an ID3D11Texture2D*?
-            hr = MFCopyImage(
-                pData,                      // Destination buffer.
-                cbWidth,                    // Destination stride.
-                frame->buffer,
-                cbWidth,                    // Source stride.
-                cbWidth,                    // Image width in bytes.
-                imageHeight                 // Image height in pixels.
-            );
-        }
-
-        if (pVideoBuffer)
-        {
-            pVideoBuffer->Unlock();
-        }
+        frame->Unlock();
 
 #if _DEBUG
 		{
@@ -408,11 +383,11 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
 
         // Set the data length of the buffer.
-        if (SUCCEEDED(hr)) { hr = pVideoBuffer->SetCurrentLength(cbBuffer); }
+        if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(cbBuffer); }
 
         // Create a media sample and add the buffer to the sample.
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); }
-        if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(pVideoBuffer); }
+        if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); }
 
         if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleTime(sampleTime); } //100-nanosecond units
         if (SUCCEEDED(hr)) { hr = pVideoSample->SetSampleDuration(frame->duration); } //100-nanosecond units
@@ -421,7 +396,6 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(videoStreamIndex, pVideoSample); }
 
         SafeRelease(pVideoSample);
-        SafeRelease(pVideoBuffer);
 
         {
             std::shared_lock<std::shared_mutex>(videoInputPoolLock);
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index a9e4d17bb..3d7899708 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -48,19 +48,38 @@ class VideoEncoder
 
     class VideoInput
     {
+        byte* buffer = nullptr;
     public:
         VideoInput(size_t bufferSize)
         {
-            buffer = new byte[bufferSize];
+            auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer);
         }
 
         ~VideoInput()
         {
-            delete[] buffer;
+            Unlock();
+            SafeRelease(mediaBuffer);
         }
 
-        byte* buffer;
+        byte* Lock()
+        {
+            if (buffer == nullptr)
+            {
+                mediaBuffer->Lock(&buffer, NULL, NULL);
+            }
+            return buffer;
+        }
+
+        void Unlock()
+        {
+            if (buffer != nullptr)
+            {
+                mediaBuffer->Unlock();
+                buffer = nullptr;
+            }
+        }
 
+        IMFMediaBuffer* mediaBuffer = nullptr;
         LONGLONG timestamp = INVALID_TIMESTAMP;
         LONGLONG duration = INVALID_TIMESTAMP;
     };
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
index bd3682f19..a030cb7c4 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
@@ -128,7 +128,7 @@ void UpdateVideoRecordingFrame()
 #endif
 
         auto frame = ci->GetAvailableRecordFrame();
-        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->buffer, bpp);
+        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->Lock(), bpp);
         frame->timestamp = queuedVideoFrameTime;
         ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
     }

From 9104cf4e1545269b8cdbbc39e7a2b33a2828798f Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Wed, 17 Jun 2020 16:03:40 +0200
Subject: [PATCH 05/10] Inital working hardware encoder

---
 .../Compositor/VideoEncoder.cpp               | 51 ++++++++++----
 .../Compositor/VideoEncoder.h                 | 66 +++++++++++++++++--
 .../UnityPlugin/UnityCompositorInterface.cpp  | 17 +++--
 .../Scripts/Compositor/TextureManager.cs      |  5 +-
 4 files changed, 111 insertions(+), 28 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 5d1cbdaa1..8bec47460 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -27,11 +27,7 @@ VideoEncoder::VideoEncoder(UINT frameWidth, UINT frameHeight, UINT frameStride,
 #else
     inputFormat = MFVideoFormat_RGB32;
 #endif
-
-    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
-    {
-        videoInputPool.push(std::make_unique<VideoInput>(frameHeight * frameStride));
-    }
+    inputFormat = MFVideoFormat_RGB32;
 }
 
 VideoEncoder::~VideoEncoder()
@@ -52,7 +48,21 @@ bool VideoEncoder::Initialize(ID3D11Device* device)
     if (deviceManager != nullptr)
     {
         OutputDebugString(L"Resetting device manager with graphics device.\n");
-        deviceManager->ResetDevice(device, resetToken);
+        hr = deviceManager->ResetDevice(device, resetToken);
+    }
+    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
+    {
+        videoInputPool.push(std::make_unique<VideoInput>(device));
+    }
+
+    ID3D10Multithread* multithread;
+    device->QueryInterface(&multithread);
+    multithread->SetMultithreadProtected(TRUE);
+
+#else
+    for (int i = 0; i < NUM_VIDEO_BUFFERS; i++)
+    {
+        videoInputPool.push(std::make_unique<VideoInput>(frameHeight * frameStride));
     }
 #endif
 
@@ -79,7 +89,7 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
     prevVideoTime = INVALID_TIMESTAMP;
     prevAudioTime = INVALID_TIMESTAMP;
 
-    HRESULT hr = E_PENDING;
+    HRESULT hr = S_OK;
 
     sinkWriter = NULL;
     videoStreamIndex = MAXDWORD;
@@ -94,13 +104,14 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
 #endif
 
     IMFAttributes *attr = nullptr;
-    MFCreateAttributes(&attr, 3);
+    MFCreateAttributes(&attr, 4);
 
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE); }
 
 #if HARDWARE_ENCODE_VIDEO
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); }
     if (SUCCEEDED(hr)) { hr = attr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, false); }
+    if (SUCCEEDED(hr)) { hr = attr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, deviceManager); }
 #endif
 
     hr = MFCreateSinkWriterFromURL(videoPath, NULL, attr, &sinkWriter);
@@ -145,6 +156,10 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
     if (SUCCEEDED(hr)) { hr = MFSetAttributeSize(pVideoTypeIn, MF_MT_FRAME_SIZE, frameWidth, frameHeight); }
     if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_FRAME_RATE, fps, 1); }
     if (SUCCEEDED(hr)) { hr = MFSetAttributeRatio(pVideoTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_DEFAULT_STRIDE, frameStride); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE); }
+    if (SUCCEEDED(hr)) { hr = pVideoTypeIn->SetUINT32(MF_MT_SAMPLE_SIZE, frameStride * frameHeight); }
     if (SUCCEEDED(hr)) { hr = sinkWriter->SetInputMediaType(videoStreamIndex, pVideoTypeIn, NULL); }
 
     if (encodeAudio)
@@ -347,7 +362,8 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
     }
 
-    videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
+    //videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
+    auto lambda = [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
     {
         if (previousWriteFuture.valid())
         {
@@ -371,10 +387,9 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         cbBuffer = (int)(FRAME_BPP_NV12 * frameWidth * frameHeight);
         imageHeight = (int)(FRAME_BPP_NV12 * frameHeight);
 #endif
+        
 
         IMFSample* pVideoSample = NULL;
-        frame->Unlock();
-
 #if _DEBUG
 		{
 			std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
@@ -382,9 +397,11 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 		}
 #endif
 
+#if !HARDWARE_ENCODE_VIDEO
+        frame->Unlock();
+#endif
         // Set the data length of the buffer.
-        if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(cbBuffer); }
-
+        if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(frameHeight * frameStride); }
         // Create a media sample and add the buffer to the sample.
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); }
         if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); }
@@ -406,7 +423,8 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         {
             OutputDebugString(L"Error writing video frame.\n");
         }
-    });
+    };
+    lambda();
 
     prevVideoTime = sampleTime;
 }
@@ -492,7 +510,12 @@ std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
     std::shared_lock<std::shared_mutex> lock(videoInputPoolLock);
     if (videoInputPool.empty())
     {
+#if HARDWARE_ENCODE_VIDEO
+        OutputDebugString(L"Oh no video encoder input pool is empty");
+        return nullptr;
+#else
         return std::make_unique<VideoInput>(frameStride * frameHeight);
+#endif
     }
     else
     {
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index 3d7899708..67604b028 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -46,7 +46,66 @@ class VideoEncoder
     // Do not call this from a background thread.
     void Update();
 
-    class VideoInput
+    class VideoInputBase
+    {
+    public:
+        IMFMediaBuffer* mediaBuffer = nullptr;
+        LONGLONG timestamp = INVALID_TIMESTAMP;
+        LONGLONG duration = INVALID_TIMESTAMP;
+    };
+
+#ifdef HARDWARE_ENCODE_VIDEO
+    class VideoInput : public VideoInputBase
+    {
+        ID3D11Device* device;
+        ID3D11DeviceContext* deviceContext;
+        ID3D11Texture2D* texture = nullptr;
+    public:
+        VideoInput(ID3D11Device* _device) : device(_device)
+        {
+            device->AddRef();
+            device->GetImmediateContext(&deviceContext);
+        }
+
+        ~VideoInput()
+        {
+            SafeRelease(texture);
+            SafeRelease(deviceContext);
+            SafeRelease(device);
+            SafeRelease(mediaBuffer);
+        }
+
+        void CopyFrom(ID3D11Texture2D* source)
+        {
+            if (texture == nullptr)
+            {
+                D3D11_TEXTURE2D_DESC existingDesc;
+                source->GetDesc(&existingDesc);
+
+                D3D11_TEXTURE2D_DESC textureDesc;
+                ZeroMemory(&textureDesc, sizeof(textureDesc));
+                textureDesc.Width = existingDesc.Width;
+                textureDesc.Height = existingDesc.Height;
+                textureDesc.MipLevels = existingDesc.MipLevels;
+                textureDesc.ArraySize = existingDesc.ArraySize;
+                textureDesc.Format = existingDesc.Format;
+                textureDesc.SampleDesc.Count = existingDesc.SampleDesc.Count;
+                textureDesc.SampleDesc.Quality = existingDesc.SampleDesc.Quality;
+                textureDesc.Usage = D3D11_USAGE_DEFAULT;
+
+                HRESULT hr = device->CreateTexture2D(&textureDesc, NULL, &texture);
+                if (SUCCEEDED(hr)) MFCreateDXGISurfaceBuffer(IID_ID3D11Texture2D, texture, 0, true, &mediaBuffer);
+                if (FAILED(hr))
+                {
+                    OutputDebugString(L"Creating video frame failed");
+                }
+            }
+
+            deviceContext->CopyResource(texture, source);
+        }
+    };
+#else
+    class VideoInput : public VideoInputBase
     {
         byte* buffer = nullptr;
     public:
@@ -78,11 +137,8 @@ class VideoEncoder
                 buffer = nullptr;
             }
         }
-
-        IMFMediaBuffer* mediaBuffer = nullptr;
-        LONGLONG timestamp = INVALID_TIMESTAMP;
-        LONGLONG duration = INVALID_TIMESTAMP;
     };
+#endif
 
 private:
     void WriteVideo(std::unique_ptr<VideoInput> frame);
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
index a030cb7c4..7126a03b6 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/UnityPlugin/UnityCompositorInterface.cpp
@@ -118,20 +118,16 @@ static int queuedVideoFrameCount = 0;
 
 void UpdateVideoRecordingFrame()
 {
+#if !HARDWARE_ENCODE_VIDEO
     //We have an old frame, lets get the data and queue it now
     if (VideoTextureBuffer.IsDataAvailable())
     {
-#if HARDWARE_ENCODE_VIDEO
-        float bpp = FRAME_BPP_NV12;
-#else
-        float bpp = FRAME_BPP_RGBA;
-#endif
-
         auto frame = ci->GetAvailableRecordFrame();
-        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->Lock(), bpp);
+        VideoTextureBuffer.FetchTextureData(g_pD3D11Device, frame->Lock(), FRAME_BPP_RGBA);
         frame->timestamp = queuedVideoFrameTime;
         ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
     }
+#endif
 
     if (lastVideoFrame >= 0 && lastRecordedVideoFrame != lastVideoFrame)
     {
@@ -161,7 +157,14 @@ void UpdateVideoRecordingFrame()
 
         lastRecordedVideoFrame = lastVideoFrame;
         queuedVideoFrameTime = lastVideoFrame * ci->GetColorDuration();
+#if HARDWARE_ENCODE_VIDEO
+        auto frame = ci->GetAvailableRecordFrame();
+        frame->CopyFrom(g_videoTexture);
+        frame->timestamp = queuedVideoFrameTime;
+        ci->RecordFrameAsync(std::move(frame), queuedVideoFrameCount);
+#else
         VideoTextureBuffer.PrepareTextureFetch(g_pD3D11Device, g_videoTexture);
+#endif
     }
 
     lastVideoFrame = ci->compositeFrameIndex;
diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
index 2dbdc9b09..d53059e4e 100644
--- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
+++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/TextureManager.cs
@@ -565,7 +565,8 @@ private IEnumerator OnPostRender()
                 }
 
                 // convert composite to the format expected by our video encoder (NV12 or BGR)
-                Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat);
+                //Graphics.Blit(videoSourceTexture, videoOutputTexture, hardwareEncodeVideo ? NV12VideoMat : BGRVideoMat);
+                Graphics.Blit(videoSourceTexture, videoOutputTexture, BGRVideoMat);
             }
 
             TextureRenderCompleted?.Invoke();
@@ -613,7 +614,7 @@ private void SetShaderValues()
             RGBToYUVMat.SetFloat("_Width", frameWidth);
             RGBToYUVMat.SetFloat("_Height", frameHeight);
 
-            BGRVideoMat.SetFloat("_YFlip", 0);
+            BGRVideoMat.SetFloat("_YFlip", 1);
         }
 
         /// <summary>

From 09f62de01c1419c20dd7e1a1b1ea3ce47f4db7b2 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Thu, 18 Jun 2020 09:23:13 +0200
Subject: [PATCH 06/10] Revert debugging tests

---
 .../Compositor/VideoEncoder.cpp               | 37 ++++++++-----------
 .../Compositor/VideoEncoder.h                 |  1 +
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 8bec47460..b1287f061 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -44,6 +44,7 @@ bool VideoEncoder::Initialize(ID3D11Device* device)
 
 #if HARDWARE_ENCODE_VIDEO
     MFCreateDXGIDeviceManager(&resetToken, &deviceManager);
+    this->device = device;
 
     if (deviceManager != nullptr)
     {
@@ -362,8 +363,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
     }
 
-    //videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
-    auto lambda = [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
+    videoWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(videoWriteFuture) }]() mutable
     {
         if (previousWriteFuture.valid())
         {
@@ -378,30 +378,22 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
             return;
         }
 
-        LONG cbWidth = frameStride;
-        DWORD cbBuffer = cbWidth * frameHeight;
-        DWORD imageHeight = frameHeight;
-
-#if HARDWARE_ENCODE_VIDEO
-        cbWidth = frameWidth;
-        cbBuffer = (int)(FRAME_BPP_NV12 * frameWidth * frameHeight);
-        imageHeight = (int)(FRAME_BPP_NV12 * frameHeight);
-#endif
-        
-
+        DWORD cbBuffer = frameStride * frameHeight;
         IMFSample* pVideoSample = NULL;
 #if _DEBUG
-		{
-			std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
-			OutputDebugString(debugString.data());
-		}
+        {
+            std::wstring debugString = L"Writing Video Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(frame->duration) + L", BufferLength:" + std::to_wstring(cbBuffer) + L"\n";
+            OutputDebugString(debugString.data());
+        }
 #endif
 
 #if !HARDWARE_ENCODE_VIDEO
         frame->Unlock();
 #endif
+
         // Set the data length of the buffer.
         if (SUCCEEDED(hr)) { hr = frame->mediaBuffer->SetCurrentLength(frameHeight * frameStride); }
+
         // Create a media sample and add the buffer to the sample.
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pVideoSample); }
         if (SUCCEEDED(hr)) { hr = pVideoSample->AddBuffer(frame->mediaBuffer); }
@@ -423,8 +415,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         {
             OutputDebugString(L"Error writing video frame.\n");
         }
-    };
-    lambda();
+    });
 
     prevVideoTime = sampleTime;
 }
@@ -453,6 +444,11 @@ void VideoEncoder::StopRecording()
 
     concurrency::create_task([&]
     {
+        if (videoWriteFuture.valid())
+        {
+            videoWriteFuture.wait();
+            videoWriteFuture = {};
+        }
         while (!videoQueue.empty())
         {
 			videoQueue.pop();
@@ -511,8 +507,7 @@ std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
     if (videoInputPool.empty())
     {
 #if HARDWARE_ENCODE_VIDEO
-        OutputDebugString(L"Oh no video encoder input pool is empty");
-        return nullptr;
+        return std::make_unique<VideoInput>(device);
 #else
         return std::make_unique<VideoInput>(frameStride * frameHeight);
 #endif
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index 67604b028..c3e2d3347 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -197,6 +197,7 @@ class VideoEncoder
     std::future<void> videoWriteFuture;
 
 #if HARDWARE_ENCODE_VIDEO
+    ID3D11Device* device;
     IMFDXGIDeviceManager* deviceManager = NULL;
     UINT resetToken = 0;
 #endif

From c76e00f1bf40ebcd416c7db6cda2ea7b72b50374 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Thu, 16 Jul 2020 09:05:15 +0200
Subject: [PATCH 07/10] Add comment

---
 .../SpectatorView.Compositor/Compositor/VideoEncoder.cpp         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index b1287f061..f59b9e999 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -388,6 +388,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
 #endif
 
 #if !HARDWARE_ENCODE_VIDEO
+        // In case the user locks the frame but forgets to unlock
         frame->Unlock();
 #endif
 

From cae0f9f9c7935d944104a124f0578527a7f042b4 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Thu, 16 Jul 2020 11:59:49 +0200
Subject: [PATCH 08/10] Fix stack overflow after long recording

---
 .../SpectatorView.Compositor/Compositor/VideoEncoder.cpp         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index f59b9e999..676b158d5 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -368,6 +368,7 @@ void VideoEncoder::WriteVideo(std::unique_ptr<VideoEncoder::VideoInput> frame)
         if (previousWriteFuture.valid())
         {
             previousWriteFuture.wait();
+            previousWriteFuture = {};
         }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 

From 83afc700f47d0645a81dc90550e047923d4f08a3 Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Mon, 20 Jul 2020 16:18:41 +0200
Subject: [PATCH 09/10] Remove temporary buffers for audio frames

---
 .../Compositor/CompositorInterface.cpp        |  5 +-
 .../Compositor/VideoEncoder.cpp               | 87 ++++++++++---------
 .../Compositor/VideoEncoder.h                 | 56 +++++++++---
 3 files changed, 91 insertions(+), 57 deletions(-)

diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
index 08d1e056e..49203d3ef 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/CompositorInterface.cpp
@@ -440,8 +440,9 @@ void CompositorInterface::RecordAudioFrameAsync(BYTE* audioFrame, LONGLONG audio
 	// The encoder will update sample times internally based on the first seen sample time when recording.
 	// The encoder, however, does assume that audio and video samples will be based on the same source time.
 	// Providing audio and video samples with different starting times will cause issues in the generated video file.
-	LONGLONG sampleTime = audioTime;
-    activeVideoEncoder->QueueAudioFrame(audioFrame, audioSize, sampleTime);
+    auto frame = activeVideoEncoder->GetAvailableAudioFrame();
+    frame->SetData(audioFrame, audioSize, audioTime);
+    activeVideoEncoder->QueueAudioFrame(std::move(frame));
 }
 
 bool CompositorInterface::ProvidesYUV()
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
index 676b158d5..a44e91a8c 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.cpp
@@ -196,12 +196,12 @@ void VideoEncoder::StartRecording(LPCWSTR videoPath, bool encodeAudio)
 #endif
 }
 
-void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
+void VideoEncoder::WriteAudio(std::unique_ptr<AudioInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 #if _DEBUG
 	{
-		std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Writing Audio, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 	}
 #endif
@@ -209,33 +209,33 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
 #if ENCODE_AUDIO
     if (!isRecording)
     {
-		std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"WriteAudio call failed: StartTime:" + std::to_wstring(startTime) + L", Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
         return;
     }
 	else if (startTime == INVALID_TIMESTAMP)
 	{
-		startTime = timestamp;
+		startTime = frame->timestamp;
 #if _DEBUG 
-		std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Start time set from audio, Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 	}
-	else if (timestamp < startTime)
+	else if (frame->timestamp < startTime)
 	{
 #if _DEBUG 
-		std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
+		std::wstring debugString = L"Audio not recorded, Timestamp less than start time. Timestamp:" + std::to_wstring(frame->timestamp) + L", StartTime:" + std::to_wstring(startTime) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
 		return;
 	}
 
-    LONGLONG sampleTimeNow = timestamp;
+    LONGLONG sampleTimeNow = frame->timestamp;
     LONGLONG sampleTimeStart = startTime;
 
     LONGLONG sampleTime = sampleTimeNow - sampleTimeStart;
 
-    LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)bufferSize) * 10000));
+    LONGLONG duration = ((LONGLONG)((((float)AUDIO_SAMPLE_RATE * (16.0f /*bits per sample*/ / 8.0f /*bits per byte*/)) / (float)frame->currentSize) * 10000));
     if (prevAudioTime != INVALID_TIMESTAMP)
     {
         duration = sampleTime - prevAudioTime;
@@ -245,62 +245,49 @@ void VideoEncoder::WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp)
 #endif
     }
 
-    // Copy frame to a temporary buffer and process on a background thread.
-    byte* tmpAudioBuffer = new byte[bufferSize];
-    memcpy(tmpAudioBuffer, buffer, bufferSize);
-
-    concurrency::create_task([=]()
+    audioWriteFuture = std::async(std::launch::async, [=, frame{ std::move(frame) }, previousWriteFuture{ std::move(audioWriteFuture) }]() mutable
     {
+        if (previousWriteFuture.valid())
+        {
+            previousWriteFuture.wait();
+            previousWriteFuture = {};
+        }
         std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
-        HRESULT hr = E_PENDING;
         if (sinkWriter == NULL || !isRecording)
         {
             OutputDebugString(L"Must start recording before writing audio frames.\n");
-            delete[] tmpAudioBuffer;
             return;
         }
 
         IMFSample* pAudioSample = NULL;
-        IMFMediaBuffer* pAudioBuffer = NULL;
-
-        const DWORD cbAudioBuffer = bufferSize;
-
-        BYTE* pData = NULL;
-
-        hr = MFCreateMemoryBuffer(cbAudioBuffer, &pAudioBuffer);
-        if (SUCCEEDED(hr)) { hr = pAudioBuffer->Lock(&pData, NULL, NULL); }
-        memcpy(pData, tmpAudioBuffer, cbAudioBuffer);
-        if (pAudioBuffer)
-        {
-            pAudioBuffer->Unlock();
-        }
-
 
 #if _DEBUG
 		{
-			std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(cbAudioBuffer) + L"\n";
+			std::wstring debugString = L"Writing Audio Sample, SampleTime:" + std::to_wstring(sampleTime) + L", SampleDuration:" + std::to_wstring(duration) + L", BufferLength:" + std::to_wstring(frame->currentSize) + L"\n";
 			OutputDebugString(debugString.data());
 		}
 #endif
 
+        HRESULT hr = S_OK;
         if (SUCCEEDED(hr)) { hr = MFCreateSample(&pAudioSample); }
         if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleTime(sampleTime); }
         if (SUCCEEDED(hr)) { hr = pAudioSample->SetSampleDuration(duration); }
-        if (SUCCEEDED(hr)) { hr = pAudioBuffer->SetCurrentLength(cbAudioBuffer); }
-        if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(pAudioBuffer); }
+        if (SUCCEEDED(hr)) { hr = pAudioSample->AddBuffer(frame->mediaBuffer); }
 
         if (SUCCEEDED(hr)) { hr = sinkWriter->WriteSample(audioStreamIndex, pAudioSample); }
 
         SafeRelease(pAudioSample);
-        SafeRelease(pAudioBuffer);
 
         if (FAILED(hr))
         {
             OutputDebugString(L"Error writing audio frame.\n");
         }
 
-        delete[] tmpAudioBuffer;
+        {
+            std::shared_lock<std::shared_mutex> lock(audioInputPoolLock);
+            audioInputPool.push(std::move(frame));
+        }
     });
 
     prevAudioTime = sampleTime;
@@ -468,6 +455,11 @@ void VideoEncoder::StopRecording()
 
     concurrency::create_task([&]
     {
+        if (audioWriteFuture.valid())
+        {
+            audioWriteFuture.wait();
+            audioWriteFuture = {};
+        }
         while (!audioQueue.empty())
         {
             audioQueue.pop();
@@ -522,6 +514,21 @@ std::unique_ptr<VideoEncoder::VideoInput> VideoEncoder::GetAvailableVideoFrame()
     }
 }
 
+std::unique_ptr<VideoEncoder::AudioInput> VideoEncoder::GetAvailableAudioFrame()
+{
+    std::shared_lock<std::shared_mutex> lock(audioInputPoolLock);
+    if (audioInputPool.empty())
+    {
+        return std::make_unique<AudioInput>();
+    }
+    else
+    {
+        auto result = std::move(audioInputPool.front());
+        audioInputPool.pop();
+        return result;
+    }
+}
+
 void VideoEncoder::QueueVideoFrame(std::unique_ptr<VideoEncoder::VideoInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
@@ -536,17 +543,17 @@ void VideoEncoder::QueueVideoFrame(std::unique_ptr<VideoEncoder::VideoInput> fra
     }
 }
 
-void VideoEncoder::QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp)
+void VideoEncoder::QueueAudioFrame(std::unique_ptr<VideoEncoder::AudioInput> frame)
 {
     std::shared_lock<std::shared_mutex> lock(videoStateLock);
 
     if (acceptQueuedFrames)
     {
-        audioQueue.push(AudioInput(buffer, bufferSize, timestamp));
 #if _DEBUG
-		std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(timestamp) + L"\n";
+		std::wstring debugString = L"Pushed Audio Input, Timestamp:" + std::to_wstring(frame->timestamp) + L"\n";
 		OutputDebugString(debugString.data());
 #endif
+        audioQueue.push(std::move(frame));
     }
 }
 
@@ -571,9 +578,7 @@ void VideoEncoder::Update()
     {
         if (isRecording)
         {
-            AudioInput input = audioQueue.front();
-            WriteAudio(input.buffer, input.bufferSize, input.timestamp);
-            delete[] input.buffer;
+            WriteAudio(std::move(audioQueue.front()));
             audioQueue.pop();
         }
     }
diff --git a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
index c3e2d3347..76598fc97 100644
--- a/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
+++ b/src/SpectatorView.Native/SpectatorView.Compositor/Compositor/VideoEncoder.h
@@ -39,9 +39,11 @@ class VideoEncoder
 
     // Used for recording video from a background thread.
     class VideoInput;
+    class AudioInput;
     std::unique_ptr<VideoInput> GetAvailableVideoFrame();
+    std::unique_ptr<AudioInput> GetAvailableAudioFrame();
     void QueueVideoFrame(std::unique_ptr<VideoInput> frame);
-    void QueueAudioFrame(byte* buffer, int bufferSize, LONGLONG timestamp);
+    void QueueAudioFrame(std::unique_ptr<AudioInput> frame);
 
     // Do not call this from a background thread.
     void Update();
@@ -140,28 +142,51 @@ class VideoEncoder
     };
 #endif
 
-private:
-    void WriteVideo(std::unique_ptr<VideoInput> frame);
-    void WriteAudio(byte* buffer, int bufferSize, LONGLONG timestamp);
-
-    LARGE_INTEGER freq;
-
     class AudioInput
     {
     public:
-        byte* buffer;
+        IMFMediaBuffer* mediaBuffer = nullptr;
+        int capacity = 0;
+        int currentSize = 0;
         LONGLONG timestamp;
-        int bufferSize;
 
-        AudioInput(byte* buffer, int buffSize, LONGLONG timestamp)
+        ~AudioInput()
         {
-            bufferSize = buffSize;
-            this->buffer = new byte[buffSize];
-            memcpy(this->buffer, buffer, buffSize);
+            SafeRelease(mediaBuffer);
+        }
+
+        void SetData(const byte* buffer, int bufferSize, LONGLONG timestamp)
+        {
+            if (bufferSize > capacity)
+            {
+                SafeRelease(mediaBuffer);
+                auto hr = MFCreateMemoryBuffer(bufferSize, &mediaBuffer);
+                if (FAILED(hr))
+                {
+                    OutputDebugString(L"Failed to create audio memory buffer");
+                }
+                capacity = bufferSize;
+            }
+            
+            byte* lockedBuffer;
+            if (FAILED(mediaBuffer->Lock(&lockedBuffer, nullptr, nullptr)))
+            {
+                return;
+            }
+            memcpy(lockedBuffer, buffer, bufferSize);
+            mediaBuffer->Unlock();
+            mediaBuffer->SetCurrentLength(bufferSize);
+            currentSize = bufferSize;
             this->timestamp = timestamp;
         }
     };
 
+private:
+    void WriteVideo(std::unique_ptr<VideoInput> frame);
+    void WriteAudio(std::unique_ptr<AudioInput> frame);
+
+    LARGE_INTEGER freq;
+
     IMFSinkWriter* sinkWriter;
     DWORD videoStreamIndex = MAXDWORD;
     DWORD audioStreamIndex = MAXDWORD;
@@ -190,11 +215,14 @@ class VideoEncoder
 
     std::queue<std::unique_ptr<VideoInput>> videoInputPool;
     std::queue<std::unique_ptr<VideoInput>> videoQueue;
-    std::queue<AudioInput> audioQueue;
+    std::queue<std::unique_ptr<AudioInput>> audioInputPool;
+    std::queue<std::unique_ptr<AudioInput>> audioQueue;
 
     std::shared_mutex videoStateLock;
     std::shared_mutex videoInputPoolLock;
+    std::shared_mutex audioInputPoolLock;
     std::future<void> videoWriteFuture;
+    std::future<void> audioWriteFuture;
 
 #if HARDWARE_ENCODE_VIDEO
     ID3D11Device* device;

From fc8a8ea1d4ad82af13f109de97ebaed422136daf Mon Sep 17 00:00:00 2001
From: Hermann Noll <hermann.justin.noll@sap.com>
Date: Mon, 20 Jul 2020 16:25:15 +0200
Subject: [PATCH 10/10] Add start-time estimation for audio frames

---
 .../Scripts/Compositor/CompositionManager.cs  | 66 +++++++++++++++++--
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
index eb4bc7c0a..327cf6cb9 100644
--- a/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
+++ b/src/SpectatorView.Unity/Assets/SpectatorView/Scripts/Compositor/CompositionManager.cs
@@ -705,6 +705,55 @@ private void ResetCompositor()
             }
         }
 
+        struct AudioStartEstimation
+        {
+            public bool IsValid { get; }
+            public AudioStartEstimation(double dspTime, int frameIndex, double frameDuration)
+            {
+                IsValid = true;
+                this.frameDuration = frameDuration;
+                errorRange = timeOffset = 0.0; // just to initialize the struct fully
+                ResetEstimation(dspTime, frameIndex);
+            }
+
+            private void ResetEstimation(double dspTime, int frameIndex)
+            {
+                errorRange = frameDuration;
+                timeOffset = frameIndex * frameDuration - dspTime;
+            }
+
+            public void Update(double dspTime, int frameIndex)
+            {
+                int expectedFrameIndex = (int)((timeOffset + dspTime) / frameDuration); // rounded down
+                int frameError = frameIndex - expectedFrameIndex;
+
+                if (frameError == 0)
+                {
+                    //Debug.Log("I was correct");
+                    return;
+                }
+                else if (System.Math.Abs(frameError) == 1)
+                {
+                    //Debug.Log($"Corrected {((timeOffset + dspTime) / frameDuration)} ({expectedFrameIndex}) not {frameIndex} timeOffset {frameError}, newTimeOffset {timeOffset}, newErrorRange {errorRange}");
+                    double bound = frameError * errorRange; // either lower/upper depending on sign(frameError)
+                    timeOffset = timeOffset + bound / 2.0;
+                    errorRange /= 2.0; // as long as there are no jumps (abs(frameError) > 1) we approach the correct value
+                }
+                else
+                {
+                    Debug.Log($"Time jumped too far, had to reset audio start estimation {((timeOffset + dspTime) / frameDuration)} not {frameIndex}");
+                    ResetEstimation(dspTime, frameIndex);
+                }
+            }
+
+            public double GetStartTime(double curDspTime) => timeOffset + curDspTime;
+
+            private double frameDuration;
+            private double timeOffset;
+            private double errorRange;
+        }
+        AudioStartEstimation audioStartEstimation;
+
         // This function is not/not always called on the main thread.
         private void OnAudioFilterRead(float[] data, int channels)
         {
@@ -713,15 +762,24 @@ private void OnAudioFilterRead(float[] data, int channels)
                 return;
             }
 
+            if (!audioStartEstimation.IsValid)
+            {
+                audioStartEstimation = new AudioStartEstimation(
+                    AudioSettings.dspTime,
+                    UnityCompositorInterface.GetCaptureFrameIndex(),
+                    UnityCompositorInterface.GetColorDuration() / 10000000.0);
+            }
+            else
+            {
+                audioStartEstimation.Update(AudioSettings.dspTime, UnityCompositorInterface.GetCaptureFrameIndex());
+            }
+
             //Create new stream
             if (audioMemoryStream == null)
             {
                 audioMemoryStream = new MemoryStream();
                 audioStreamWriter = new BinaryWriter(audioMemoryStream);
-                double audioSettingsTime = AudioSettings.dspTime; // Audio time in seconds, more accurate than Time.time
-                double captureFrameTime = UnityCompositorInterface.GetCaptureFrameIndex() * UnityCompositorInterface.GetColorDuration() / 10000000.0; // Capture Frame Time in seconds
-                DebugLog($"Obtained Audio Sample, AudioSettingsTime:{audioSettingsTime}, CaptureFrameTime:{captureFrameTime}");
-                audioStartTime = captureFrameTime;
+                audioStartTime = audioStartEstimation.GetStartTime(AudioSettings.dspTime);
                 numCachedAudioFrames = 0;
             }