From 5fa089d529d9488fe7deda1ad1ee9aab28342266 Mon Sep 17 00:00:00 2001
From: Akash Bora <89206401+Akascape@users.noreply.github.com>
Date: Tue, 5 Mar 2024 20:00:36 +0530
Subject: [PATCH 1/3] Added audio

---
 tkVideoPlayer/tkvideoplayer.py | 176 +++++++++++++++++++++++++--------
 1 file changed, 137 insertions(+), 39 deletions(-)
diff --git a/tkVideoPlayer/tkvideoplayer.py b/tkVideoPlayer/tkvideoplayer.py
index f2c8a9f..39aabad 100644
--- a/tkVideoPlayer/tkvideoplayer.py
+++ b/tkVideoPlayer/tkvideoplayer.py
@@ -4,6 +4,7 @@
 import threading
 import logging
 import tkinter as tk
+import pyaudio
 from PIL import ImageTk, Image, ImageOps
 from typing import Tuple, Dict
 
@@ -12,7 +13,7 @@
 
 class TkinterVideo(tk.Label):
 
-    def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = True, keep_aspect: bool = False, *args, **kwargs):
+    def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = True, keep_aspect: bool = False, audio=True, *args, **kwargs):
         super(TkinterVideo, self).__init__(master, *args, **kwargs)
 
         self.path = ""
@@ -35,6 +36,8 @@ def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = Tr
         self._seek = False
         self._seek_sec = 0
 
+        self._audio = audio
+        
         self._video_info = {
             "duration": 0, # duration of the video
             "framerate": 0, # frame rate of the video
@@ -132,8 +135,23 @@ def _load(self, path):
 
                 self._set_frame_size()
 
-                self.stream_base = stream.time_base
-
+                try:
+                    if self._audio:
+                        audio_stream = self._container.streams.audio[0]
+
+                        samplerate = audio_stream.rate # this samplerate will work as the video clock
+                        channels = audio_stream.channels
+                  
+                        p = pyaudio.PyAudio()
+                        audio_device = p.open(format=pyaudio.paFloat32,
+                                              channels=channels,
+                                              rate=samplerate,
+                                              output=True)
+                    else:
+                        audio_device = False
+                except:
+                    audio_device = False
+                
                 try:
                     self.event_generate("<<Loaded>>") # generated when the video file is opened
                 
@@ -145,7 +163,6 @@ def _load(self, path):
 
                 time_in_frame = (1/self._video_info["framerate"])*1000 # second it should play each frame
 
-
                 while self._load_thread == current_thread and not self._stop:
                     if self._seek: # seek to specific second
                         self._container.seek(self._seek_sec*1000000 , whence='time', backward=True, any_frame=False) # the seek time is given in av.time_base, the multiplication is to correct the frame
@@ -157,53 +174,127 @@ def _load(self, path):
                     if self._paused:
                         time.sleep(0.0001) # to allow other threads to function better when its paused
                         continue
-
-                    now = time.time_ns() // 1_000_000  # time in milliseconds
-                    delta = now - then  # time difference between current frame and previous frame
-                    then = now
-            
+                    
+                    self.frame_buffers = [] # flush all previous buffers
+                    
                     # print("Frame: ", frame.time, frame.index, self._video_info["framerate"])
                     try:
-                        frame = next(self._container.decode(video=0))
-
-                        self._time_stamp = float(frame.pts * stream.time_base)
-
-                        width = self._current_frame_size[0]
-                        height = self._current_frame_size[1]
-                        if self._keep_aspect_ratio:
-                            im_ratio = frame.width / frame.height
-                            dest_ratio = width / height
-                            if im_ratio != dest_ratio:
-                                if im_ratio > dest_ratio:
-                                    new_height = round(frame.height / frame.width * width)
-                                    height = new_height
+                        if audio_device and self._audio:
+                            
+                            dont_seek = False
+                    
+                            last_audio_buffer = False
+                            last_video_buffer = False
+                            
+                            while True:
+                                frame = next(self._container.decode(video=0, audio=0))
+                                
+                                if 'Video' in repr(frame):
+                                    if last_audio_buffer:
+                                        if round(float(frame.pts * stream.time_base), 2)<=last_audio_buffer:
+                                            self.frame_buffers.append(frame)
+                                        else:
+                                            break # break if the last audio buffer pts matches the final video buffer pts
+                                        if not last_video_buffer:
+                                            break
+                                        dont_seek = True
+                                    else:
+                                        self.frame_buffers.append(frame)
+                                        last_video_buffer = True
+                                        
                                 else:
-                                    new_width = round(frame.width / frame.height * height)
-                                    width = new_width
-
-                        self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR")
-
-                        self._frame_number += 1
-                
-                        self.event_generate("<<FrameGenerated>>")
+                                    if dont_seek: # avoid excessive buffering, can cause stuttering frames
+                                        break
+                                    self.frame_buffers.append(frame)
+                                    last_audio_buffer = round(float(frame.pts * audio_stream.time_base), 2)
+                     
+                    
+                            self.frame_buffers = sorted(self.frame_buffers, key=lambda f: f.pts * stream.time_base if 'Video' in repr(f) else f.pts * audio_stream.time_base) # sort all the frames based on their presentation time
+
+                            for frame in self.frame_buffers:
+                                if 'Video' in repr(frame):
+                                    
+                                    width = self._current_frame_size[0]
+                                    height = self._current_frame_size[1]
+                                    if self._keep_aspect_ratio:
+                                        im_ratio = frame.width / frame.height
+                                        dest_ratio = width / height
+                                        if im_ratio != dest_ratio:
+                                            if im_ratio > dest_ratio:
+                                                new_height = round(frame.height / frame.width * width)
+                                                height = new_height
+                                            else:
+                                                new_width = round(frame.width / frame.height * height)
+                                                width = new_width
+
+                                    self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR")
+
+                                    self._frame_number += 1
+                            
+                                    self.event_generate("<<FrameGenerated>>")
+
+                                    if self._frame_number % self._video_info["framerate"] == 0:
+                                        self.event_generate("<<SecondChanged>>")
+                                
+                                else:
+                                    self._time_stamp = float(frame.pts * audio_stream.time_base)
+                                    audio_data = frame.to_ndarray().astype('float32')
+                                    interleaved_data = audio_data.T.flatten().tobytes()
+                                    audio_device.write(interleaved_data)
+                                    
+                                if self._stop or self._paused:
+                                    break
+                                    
+                        else:
+                            now = time.time_ns() // 1_000_000  # time in milliseconds
+                            delta = now - then  # time difference between current frame and previous frame
+                            then = now
+                             
+                            frame = next(self._container.decode(video=0))
+
+                            self._time_stamp = float(frame.pts * stream.time_base)
+
+                            width = self._current_frame_size[0]
+                            height = self._current_frame_size[1]
+                            if self._keep_aspect_ratio:
+                                im_ratio = frame.width / frame.height
+                                dest_ratio = width / height
+                                if im_ratio != dest_ratio:
+                                    if im_ratio > dest_ratio:
+                                        new_height = round(frame.height / frame.width * width)
+                                        height = new_height
+                                    else:
+                                        new_width = round(frame.width / frame.height * height)
+                                        width = new_width
+
+                            self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR")
+
+                            self._frame_number += 1
+                    
+                            self.event_generate("<<FrameGenerated>>")
 
-                        if self._frame_number % self._video_info["framerate"] == 0:
-                            self.event_generate("<<SecondChanged>>")
+                            if self._frame_number % self._video_info["framerate"] == 0:
+                                self.event_generate("<<SecondChanged>>")
 
-                        if self.consistant_frame_rate:
-                            time.sleep(max((time_in_frame - delta)/1000, 0))
+                            if self.consistant_frame_rate:
+                                time.sleep(max((time_in_frame - delta)/1000, 0))
 
-                        # time.sleep(abs((1 / self._video_info["framerate"]) - (delta / 1000)))
+                            # time.sleep(abs((1 / self._video_info["framerate"]) - (delta / 1000)))
 
                     except (StopIteration, av.error.EOFError, tk.TclError):
                         break
-                    
-                self._container.close()
 
             # print("Container: ", self._container.c)
             if self._container:
                 self._container.close()
+                stream.close()
                 self._container = None
+
+            if audio_device:
+                audio_device.stop_stream()
+                audio_device.close()
+                p.terminate()
+                audio_stream.close()
             
         finally:
             self._cleanup()
@@ -213,6 +304,8 @@ def _cleanup(self):
         self._frame_number = 0
         self._paused = True
         self._stop = True
+        self.frame_buffers = []
+        
         if self._load_thread:
             self._load_thread = None
         if self._container:
@@ -223,7 +316,6 @@ def _cleanup(self):
         except tk.TclError:
             pass
 
-
     def load(self, path: str):
         """ loads the file from the given path """
         self.stop()
@@ -249,6 +341,12 @@ def play(self):
             self._load_thread = threading.Thread(target=self._load,  args=(self.path, ), daemon=True)
             self._load_thread.start()
 
+    def mute(self):
+        self._audio = False
+
+    def unmute(self):
+        self._audio = True
+        
     def is_paused(self):
         """ returns if the video is paused """
         return self._paused
@@ -290,4 +388,4 @@ def seek(self, sec: int):
 
         self._seek = True
         self._seek_sec = sec            
-            
\ No newline at end of file
+        

From c7829a24c45c1e0e86c7a839fc057b556318cb7f Mon Sep 17 00:00:00 2001
From: Akash Bora <89206401+Akascape@users.noreply.github.com>
Date: Tue, 5 Mar 2024 20:01:45 +0530
Subject: [PATCH 2/3] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 8ad93ce..cc1d571 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 av==9.2.0
 pillow~=9.0.1
+pyaudio~=0.2.14

From a0f1b8fa0d8e30515a693c335f284c634513fc4e Mon Sep 17 00:00:00 2001
From: Akash Bora <89206401+Akascape@users.noreply.github.com>
Date: Tue, 5 Mar 2024 20:04:38 +0530
Subject: [PATCH 3/3] Update Documentation.md

---
 Documentation.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation.md b/Documentation.md
index 5685b75..b737d96 100644
--- a/Documentation.md
+++ b/Documentation.md
@@ -28,7 +28,7 @@ Below are the methods of this library.
 
 | Methods          | Parameters                           | Description                                                                                                                                                                                   |
 |------------------|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| \_\_init\_\_     | scaled(bool), consistant_frame_rate(bool)=True, keep_aspect(bool)=False   | The scale parameter scales the video to the label size.  The consistant_frame_rate parameter skips frames to keep the framerate consistant and keep_aspect keeps aspect ratio when resizing(note: It will not increase the size)       |
+| \_\_init\_\_     | scaled(bool), consistant_frame_rate(bool)=True, keep_aspect(bool)=False, audio(bool)=True   | <br> The scale parameter scales the video to the label size.  <br> The consistant_frame_rate parameter skips frames to keep the framerate consistant. <br> keep_aspect keeps aspect ratio when resizing(note: It will not increase the size)   <br> The audio parameter enables audio in the clip    |
 | set_scaled       | scaled(bool), keep_aspect(bool)=False                         | scales the video to the label size.                                                                                                                                                           |
 | load             | file_path(str)                       | starts loading the video in a thread.                                                                                                                                                         |
 | set_size         | size(Tuple[int, int]), keep_aspect(bool)=False | sets the size of the video frame. setting this will set scaled to `False`                                                                                                                     |
@@ -41,6 +41,7 @@ Below are the methods of this library.
 | seek             | sec(int)                             | moves to specific time stamp. provide time stamp in seconds                                           
 | keep_aspect             | keep_aspect(bool)                            | keeps aspect ratio when resizing                                          
 | metadata         | -                                    | returns meta information of the video if available in the form of dictionary                                           
+| mute/unmute      | -                                    | enable/disable audio in the clip |
 | set_resampling_method|  method(int)                                   | By default the resampling method while resizing is NEAREST, changing this can affect how its resampled when image is resized, refer PIL documentation to read more (note: this can also affect the framerate of the video)|
 
 ### Virtual events