From 5fa089d529d9488fe7deda1ad1ee9aab28342266 Mon Sep 17 00:00:00 2001 From: Akash Bora <89206401+Akascape@users.noreply.github.com> Date: Tue, 5 Mar 2024 20:00:36 +0530 Subject: [PATCH 1/3] Added audio --- tkVideoPlayer/tkvideoplayer.py | 176 +++++++++++++++++++++++++-------- 1 file changed, 137 insertions(+), 39 deletions(-) diff --git a/tkVideoPlayer/tkvideoplayer.py b/tkVideoPlayer/tkvideoplayer.py index f2c8a9f..39aabad 100644 --- a/tkVideoPlayer/tkvideoplayer.py +++ b/tkVideoPlayer/tkvideoplayer.py @@ -4,6 +4,7 @@ import threading import logging import tkinter as tk +import pyaudio from PIL import ImageTk, Image, ImageOps from typing import Tuple, Dict @@ -12,7 +13,7 @@ class TkinterVideo(tk.Label): - def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = True, keep_aspect: bool = False, *args, **kwargs): + def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = True, keep_aspect: bool = False, audio=True, *args, **kwargs): super(TkinterVideo, self).__init__(master, *args, **kwargs) self.path = "" @@ -35,6 +36,8 @@ def __init__(self, master, scaled: bool = True, consistant_frame_rate: bool = Tr self._seek = False self._seek_sec = 0 + self._audio = audio + self._video_info = { "duration": 0, # duration of the video "framerate": 0, # frame rate of the video @@ -132,8 +135,23 @@ def _load(self, path): self._set_frame_size() - self.stream_base = stream.time_base - + try: + if self._audio: + audio_stream = self._container.streams.audio[0] + + samplerate = audio_stream.rate # this samplerate will work as the video clock + channels = audio_stream.channels + + p = pyaudio.PyAudio() + audio_device = p.open(format=pyaudio.paFloat32, + channels=channels, + rate=samplerate, + output=True) + else: + audio_device = False + except: + audio_device = False + try: self.event_generate("<>") # generated when the video file is opened @@ -145,7 +163,6 @@ def _load(self, path): time_in_frame = (1/self._video_info["framerate"])*1000 # second it should play each frame - while self._load_thread == current_thread and not self._stop: if self._seek: # seek to specific second self._container.seek(self._seek_sec*1000000 , whence='time', backward=True, any_frame=False) # the seek time is given in av.time_base, the multiplication is to correct the frame @@ -157,53 +174,127 @@ def _load(self, path): if self._paused: time.sleep(0.0001) # to allow other threads to function better when its paused continue - - now = time.time_ns() // 1_000_000 # time in milliseconds - delta = now - then # time difference between current frame and previous frame - then = now - + + self.frame_buffers = [] # flush all previous buffers + # print("Frame: ", frame.time, frame.index, self._video_info["framerate"]) try: - frame = next(self._container.decode(video=0)) - - self._time_stamp = float(frame.pts * stream.time_base) - - width = self._current_frame_size[0] - height = self._current_frame_size[1] - if self._keep_aspect_ratio: - im_ratio = frame.width / frame.height - dest_ratio = width / height - if im_ratio != dest_ratio: - if im_ratio > dest_ratio: - new_height = round(frame.height / frame.width * width) - height = new_height + if audio_device and self._audio: + + dont_seek = False + + last_audio_buffer = False + last_video_buffer = False + + while True: + frame = next(self._container.decode(video=0, audio=0)) + + if 'Video' in repr(frame): + if last_audio_buffer: + if round(float(frame.pts * stream.time_base), 2)<=last_audio_buffer: + self.frame_buffers.append(frame) + else: + break # break if the last audio buffer pts matches the final video buffer pts + if not last_video_buffer: + break + dont_seek = True + else: + self.frame_buffers.append(frame) + last_video_buffer = True + else: - new_width = round(frame.width / frame.height * height) - width = new_width - - self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR") - - self._frame_number += 1 - - self.event_generate("<>") + if dont_seek: # avoid excessive buffering, can cause stuttering frames + break + self.frame_buffers.append(frame) + last_audio_buffer = round(float(frame.pts * audio_stream.time_base), 2) + + + self.frame_buffers = sorted(self.frame_buffers, key=lambda f: f.pts * stream.time_base if 'Video' in repr(f) else f.pts * audio_stream.time_base) # sort all the frames based on their presentation time + + for frame in self.frame_buffers: + if 'Video' in repr(frame): + + width = self._current_frame_size[0] + height = self._current_frame_size[1] + if self._keep_aspect_ratio: + im_ratio = frame.width / frame.height + dest_ratio = width / height + if im_ratio != dest_ratio: + if im_ratio > dest_ratio: + new_height = round(frame.height / frame.width * width) + height = new_height + else: + new_width = round(frame.width / frame.height * height) + width = new_width + + self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR") + + self._frame_number += 1 + + self.event_generate("<>") + + if self._frame_number % self._video_info["framerate"] == 0: + self.event_generate("<>") + + else: + self._time_stamp = float(frame.pts * audio_stream.time_base) + audio_data = frame.to_ndarray().astype('float32') + interleaved_data = audio_data.T.flatten().tobytes() + audio_device.write(interleaved_data) + + if self._stop or self._paused: + break + + else: + now = time.time_ns() // 1_000_000 # time in milliseconds + delta = now - then # time difference between current frame and previous frame + then = now + + frame = next(self._container.decode(video=0)) + + self._time_stamp = float(frame.pts * stream.time_base) + + width = self._current_frame_size[0] + height = self._current_frame_size[1] + if self._keep_aspect_ratio: + im_ratio = frame.width / frame.height + dest_ratio = width / height + if im_ratio != dest_ratio: + if im_ratio > dest_ratio: + new_height = round(frame.height / frame.width * width) + height = new_height + else: + new_width = round(frame.width / frame.height * height) + width = new_width + + self._current_img = frame.to_image(width=width, height=height, interpolation="FAST_BILINEAR") + + self._frame_number += 1 + + self.event_generate("<>") - if self._frame_number % self._video_info["framerate"] == 0: - self.event_generate("<>") + if self._frame_number % self._video_info["framerate"] == 0: + self.event_generate("<>") - if self.consistant_frame_rate: - time.sleep(max((time_in_frame - delta)/1000, 0)) + if self.consistant_frame_rate: + time.sleep(max((time_in_frame - delta)/1000, 0)) - # time.sleep(abs((1 / self._video_info["framerate"]) - (delta / 1000))) + # time.sleep(abs((1 / self._video_info["framerate"]) - (delta / 1000))) except (StopIteration, av.error.EOFError, tk.TclError): break - - self._container.close() # print("Container: ", self._container.c) if self._container: self._container.close() + stream.close() self._container = None + + if audio_device: + audio_device.stop_stream() + audio_device.close() + p.terminate() + audio_stream.close() finally: self._cleanup() @@ -213,6 +304,8 @@ def _cleanup(self): self._frame_number = 0 self._paused = True self._stop = True + self.frame_buffers = [] + if self._load_thread: self._load_thread = None if self._container: @@ -223,7 +316,6 @@ def _cleanup(self): except tk.TclError: pass - def load(self, path: str): """ loads the file from the given path """ self.stop() @@ -249,6 +341,12 @@ def play(self): self._load_thread = threading.Thread(target=self._load, args=(self.path, ), daemon=True) self._load_thread.start() + def mute(self): + self._audio = False + + def unmute(self): + self._audio = True + def is_paused(self): """ returns if the video is paused """ return self._paused @@ -290,4 +388,4 @@ def seek(self, sec: int): self._seek = True self._seek_sec = sec - \ No newline at end of file + From c7829a24c45c1e0e86c7a839fc057b556318cb7f Mon Sep 17 00:00:00 2001 From: Akash Bora <89206401+Akascape@users.noreply.github.com> Date: Tue, 5 Mar 2024 20:01:45 +0530 Subject: [PATCH 2/3] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8ad93ce..cc1d571 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ av==9.2.0 pillow~=9.0.1 +pyaudio~=0.2.14 From a0f1b8fa0d8e30515a693c335f284c634513fc4e Mon Sep 17 00:00:00 2001 From: Akash Bora <89206401+Akascape@users.noreply.github.com> Date: Tue, 5 Mar 2024 20:04:38 +0530 Subject: [PATCH 3/3] Update Documentation.md --- Documentation.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation.md b/Documentation.md index 5685b75..b737d96 100644 --- a/Documentation.md +++ b/Documentation.md @@ -28,7 +28,7 @@ Below are the methods of this library. | Methods | Parameters | Description | |------------------|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| \_\_init\_\_ | scaled(bool), consistant_frame_rate(bool)=True, keep_aspect(bool)=False | The scale parameter scales the video to the label size. The consistant_frame_rate parameter skips frames to keep the framerate consistant and keep_aspect keeps aspect ratio when resizing(note: It will not increase the size) | +| \_\_init\_\_ | scaled(bool), consistant_frame_rate(bool)=True, keep_aspect(bool)=False, audio(bool)=True |
The scale parameter scales the video to the label size.
The consistant_frame_rate parameter skips frames to keep the framerate consistant.
keep_aspect keeps aspect ratio when resizing(note: It will not increase the size)
The audio parameter enables audio in the clip | | set_scaled | scaled(bool), keep_aspect(bool)=False | scales the video to the label size. | | load | file_path(str) | starts loading the video in a thread. | | set_size | size(Tuple[int, int]), keep_aspect(bool)=False | sets the size of the video frame. setting this will set scaled to `False` | @@ -41,6 +41,7 @@ Below are the methods of this library. | seek | sec(int) | moves to specific time stamp. provide time stamp in seconds | keep_aspect | keep_aspect(bool) | keeps aspect ratio when resizing | metadata | - | returns meta information of the video if available in the form of dictionary +| mute/unmute | - | enable/disable audio in the clip | | set_resampling_method| method(int) | By default the resampling method while resizing is NEAREST, changing this can affect how its resampled when image is resized, refer PIL documentation to read more (note: this can also affect the framerate of the video)| ### Virtual events