+"""
+
+from .cli import cli, ArgumentsProcessor
+from .utils.logging_utils import LoggingFormatter, colored_logger
+from .utils.visualizer import audiowave_visualizer
+from .utils.metadata_utils import get_audio_bitrate
+from .core.codec import AudioSegmentArrayCodec
+from .core.effects import VoiceEffectProcessor
+from .core.audio.core import AudioModulator, AudioDenoiser
+
+__version__ = "0.2.0"
+__all__ = [
+ "cli",
+ "ArgumentsProcessor",
+ "LoggingFormatter",
+ "colored_logger",
+ "audiowave_visualizer",
+ "get_audio_bitrate",
+ "AudioSegmentArrayCodec",
+ "VoiceEffectProcessor",
+ "AudioModulator",
+ "AudioDenoiser",
+]
+LOGO = """
+ ///////] /// /// ///////] (O) //////] ///// //////] /////////
+ // // /// /// // // /// /// /// // / /// /// ///
+ ///////// /// /// // / /// /// /// ///// /// /// ///
+ // // /// /// // / /// // // // / // // ///
+// // ////////// /////// / /// /////// /////// /////// ///
+"""
diff --git a/audiobot/cli.py b/audiobot/cli.py
new file mode 100644
index 0000000..4b50372
--- /dev/null
+++ b/audiobot/cli.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python3
+
+
+"""
+CLI Entry point for audiobot.\n
+Implements:\n
+ Argsmain->cmd argument handler either from other packages or directly form cli
+"""
+
+import argparse
+import logging
+
+import os
+
+import magic
+from .utils.metadata_utils import transcribe_audio
+from filemac.utils.colors import fg, rs
+from .core.processor import VideoProcessor, AudioProcessor
+from .utils.logging_utils import colored_logger
+from .config.core import Config
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+class ArgumentsProcessor:
+ def __init__(self, args, parser):
+ self.args = args
+ self.parser = parser
+ self.mime = magic.Magic(mime=True)
+ self.output_dir = os.getcwd() if not self.args.output else self.args.output
+
+ def process(self):
+ if not self.args or self.args.audio_effect:
+ self.parser.print_help()
+ return
+
+ if self.args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ if self.args.output and not os.path.exists(self.args.output):
+ os.makedirs(self.args.output)
+ if self.args.batch:
+ self.batch_processor()
+ else:
+ self.mono_processor()
+
+ def mono_processor(self):
+ try:
+ file_type = self.mime.from_file(self.args.file)
+ Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}")
+ if file_type.startswith("audio"):
+ if self.args.transcribe:
+ transcribe_audio(self.args.file)
+ AudioProcessor().process_audio_file(
+ self.args.file,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ elif file_type.startswith("video"):
+ VideoProcessor().process_video_file(
+ self.args.file,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ else:
+ Clogger.warning(
+ f"Unsupported file type: {file_type}. Only audio and video files are supported."
+ )
+ except Exception as e:
+ Clogger.error(e)
+
+ def batch_processor(self):
+ try:
+ for root, _, files in os.walk(self.args.file):
+ for file in files:
+ full_path = os.path.join(root, file)
+ file_type = self.mime.from_file(full_path)
+ Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}")
+ if file_type.startswith("audio"):
+ if self.args.transcribe:
+ transcribe_audio(full_path)
+ AudioProcessor().process_audio_file(
+ full_path,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ elif file_type.startswith("video"):
+ VideoProcessor().process_video_file(
+ full_path,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ else:
+ Clogger.warning(f"Ignoring unsupported file type: {file}")
+ except Exception as e:
+ Clogger.info(e)
+
+
+def cli(argsv=None):
+ """
+ Recieve and process agruments from audio/video audio effects
+ """
+ parser = argparse.ArgumentParser(
+ description="Audiobot: A tool for audio effects on audio and video files.",
+ usage="filemac --audio_effect [-h] [--file FILE] \n\
+ [-e {robotic,deep,high,echo,reverb,whisper,demonic,chipmunk,hacker,lowpass,distortion}] \n\
+ [-o OUTPUT] [-v] [-b] [--visualize] [--transcribe] \n\
+ [--audio_effect]",
+ )
+ parser.add_argument(
+ "file",
+ help=f"{fg.CYAN}The input audio, video file, or directory.{RESET}",
+ )
+ parser.add_argument(
+ "-e",
+ "--effect",
+ choices=[
+ "robotic",
+ "deep",
+ "high",
+ "echo",
+ "reverb",
+ "whisper",
+ "demonic",
+ "chipmunk",
+ "hacker",
+ "lowpass",
+ "highpass",
+ "distortion",
+ "denoise",
+ ],
+ help=f"{fg.CYAN}The voice effect to apply.{RESET}",
+ )
+ parser.add_argument(
+ "--cutoff",
+ type=int,
+ help=f"Cutoff frequency for denoise operation, defualt={fg.YELLOW}200{RESET}",
+ )
+ parser.add_argument(
+ "-N",
+ "--noise",
+ choices=["low", "high", "both"],
+ type=str,
+ default="low",
+ help=f"Specifies the type of noise to remove choices:[{fg.BLUE}low, high, both{RESET}] defualt={fg.YELLOW}low{RESET}",
+ )
+ parser.add_argument(
+ "-o",
+ "--output",
+ help=f"{fg.CYAN}Output directory for modified files.{RESET}",
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help=f"{fg.CYAN}Increase output verbosity.{RESET}",
+ )
+ parser.add_argument(
+ "-b",
+ "--batch",
+ action="store_true",
+ help=f"{fg.CYAN}Batch process all files in a directory.{RESET}",
+ )
+ parser.add_argument(
+ "--visualize",
+ action="store_true",
+ help=f"{fg.CYAN}Visualize the audio waveform before and after modification.{RESET}",
+ )
+ parser.add_argument(
+ "--transcribe",
+ action="store_true",
+ help=f"{fg.CYAN}Transcribe the audio content before applying the effect.{RESET}",
+ )
+ parser.add_argument("--audio_effect", action="store_true", help=argparse.SUPPRESS)
+
+ args = parser.parse_args(argsv) if argsv else parser.parse_args()
+ if args.cutoff:
+ config = Config()
+ config.options["cutoff"] = args.cutoff
+ config.options["noise"] = args.noise
+ # Call argument processor
+ ArgumentsProcessor(args, parser).process()
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/audiobot/config/__init__.py b/audiobot/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/audiobot/config/core.py b/audiobot/config/core.py
new file mode 100644
index 0000000..5efa61d
--- /dev/null
+++ b/audiobot/config/core.py
@@ -0,0 +1,8 @@
+class Config:
+ _instance = None
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super(Config, cls).__new__(cls)
+ cls._instance.options = {}
+ return cls._instance
diff --git a/audiobot/core/__init__.py b/audiobot/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/audiobot/core/audio/core.py b/audiobot/core/audio/core.py
new file mode 100644
index 0000000..5274383
--- /dev/null
+++ b/audiobot/core/audio/core.py
@@ -0,0 +1,283 @@
+import numpy as np
+from ...utils.logging_utils import colored_logger
+import librosa
+from pydub import AudioSegment, effects
+from scipy.signal import butter, lfilter, sosfilt
+from ...config.core import Config
+from filemac.utils.colors import fg, rs
+
+RESET = rs
+
+Clogger = colored_logger()
+config = Config()
+
+
+class AudioModulator:
+ def __init__(self):
+ self._cutoff = config.options.get("cutoff")
+
+ def pitch_shift(self, audio_segment, n_steps):
+ # Convert the audio samples to a NumPy array in float32
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
+
+ # If the audio is stereo, convert it to mono
+ if audio_segment.channels == 2:
+ samples = audio_segment.set_channels(1)
+
+ # Convert the samples back to NumPy array and flaoting point
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
+
+ # Pitch shift (no need to pass sample_rate separately)
+ shifted_samples = librosa.effects.pitch_shift(
+ samples, sr=audio_segment.frame_rate, n_steps=n_steps
+ )
+
+ # Convert the shifted samples back to int16
+ shifted_audio = AudioSegment(
+ shifted_samples.astype(np.int16).tobytes(),
+ frame_rate=audio_segment.frame_rate,
+ sample_width=audio_segment.sample_width,
+ channels=audio_segment.channels,
+ )
+
+ return shifted_audio
+
+ def hacker(self, audio_segment):
+ """Applies a deep, robotic voice effect used for anonymity."""
+
+ # Step 1: Pitch shift down (lower the pitch)
+ Clogger.info("Applying deep pitch shift for hacker voice")
+ deep_voice = self.pitch_shift(audio_segment, n_steps=-10)
+
+ # Step 2: Speed up for robotic effect
+ Clogger.info("Speeding up for robotic effect")
+ robotic_voice = effects.speedup(deep_voice, playback_speed=1.1)
+ if robotic_voice is None:
+ Clogger.error("Speedup failed")
+ return None
+
+ # Step 3: Apply reverb (check for validity)
+ Clogger.info("Adding subtle echo for distortion")
+ if isinstance(robotic_voice, AudioSegment):
+ # Shorter delay for subtle echo
+ delay = AudioSegment.silent(duration=500)
+
+ Clogger.info("Overlaying echo effect")
+
+ try:
+ echo_effect = robotic_voice.overlay(delay + robotic_voice - 5000)
+ except Exception as e:
+ Clogger.error(f"Error during overlay: {e}")
+ return None
+ else:
+ Clogger.error("Robotic voice generation failed")
+ return None
+
+ # Step 4: Apply low-pass filter (optional)
+ hacker_voice_effect = (
+ effects.low_pass_filter(echo_effect, cutoff=2500) if echo_effect else None
+ )
+ if hacker_voice_effect is None:
+ Clogger.error("Low pass filter failed")
+ return None
+
+ return hacker_voice_effect
+
+ def echo(self, samples, delay=0.2, decay=0.5, sample_rate=44100):
+ """Apply echo effect with a specified delay and decay."""
+ delay_samples = int(sample_rate * delay)
+ echo_signal = np.zeros(len(samples) + delay_samples)
+
+ echo_signal[: len(samples)] = samples
+ echo_signal[delay_samples:] += decay * samples # Delayed echo signal
+
+ return echo_signal[: len(samples)] # Truncate to original length
+
+ def reverb(self, samples, decay=0.7, delay=0.05, sample_rate=44100):
+ try:
+ """Apply a reverb effect by adding delayed and attenuated copies of the signal."""
+ delay_samples = int(sample_rate * delay)
+
+ # Create a delayed version of the samples and attenuate (apply decay)
+ reverb_samples = np.zeros_like(samples)
+
+ if samples.ndim == 2: # Stereo
+ for i in range(delay_samples, len(samples)):
+ reverb_samples[i, 0] = (
+ samples[i, 0] + decay * samples[i - delay_samples, 0]
+ )
+ reverb_samples[i, 1] = (
+ samples[i, 1] + decay * samples[i - delay_samples, 1]
+ )
+ else: # Mono
+ for i in range(delay_samples, len(samples)):
+ reverb_samples[i] = samples[i] + decay * samples[i - delay_samples]
+
+ return reverb_samples
+ except Exception as e:
+ Clogger.error(e)
+ # raise
+
+ def lowpass_filter(self, samples, cutoff=200, sample_rate=44100):
+ """
+ Apply a low-pass filter to remove frequencies higher than the specified cutoff.
+
+ This function uses a 6th-order Butterworth filter to attenuate frequencies above the
+ cutoff frequency, effectively smoothing the audio signal.
+
+ Args:
+ samples (numpy.ndarray): The audio samples as a NumPy array.
+ cutoff (int, optional): The cutoff frequency in Hz. Defaults to 200.
+ Typical cutoff values:
+ - Voice: 1000-2000 Hz
+ - Music: 5000-8000 Hz
+ - Hiss/noise removal: 200-500 Hz
+ sample_rate (int, optional): The sample rate of the audio in Hz. Defaults to 44100.
+
+ Returns:
+ numpy.ndarray: The filtered audio samples as a NumPy array.
+ """
+
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{cutoff}{RESET}")
+ Clogger.info("Apply a low-pass filter to remove frequencies higher than cutoff")
+ nyquist = 0.5 * sample_rate
+ normal_cutoff = cutoff / nyquist
+ b, a = butter(6, normal_cutoff, btype="low", analog=False)
+ filtered_samples = lfilter(b, a, samples)
+
+ return filtered_samples
+
+ def distort(self, samples, gain=10, threshold=0.3):
+ """Apply distortion by clipping the waveform."""
+ Clogger.info("Apply distortion by clipping the waveform.")
+ samples = samples * gain
+ samples = np.clip(samples, -threshold, threshold) # Clip at threshold
+ return samples
+
+ def whisper(self, audio_segment):
+ return effects.low_pass_filter(audio_segment, 70).apply_gain(-10)
+
+ def highpass(self, audio_segment, cutoff: int = 200):
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}")
+ return effects.high_pass_filter(audio_segment, cutoff=cutoff)
+
+ def lowpass(self, audio_segment, cutoff: int = 2200):
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}")
+ return effects.low_pass_filter(audio_segment, cutoff=cutoff)
+
+ def normalize(self, audio_segment):
+ return effects.normalize(audio_segment)
+
+
+class AudioDenoiser:
+ def __init__(self, sample_rate=44100):
+ self.sample_rate = sample_rate
+ # Dictionaries to cache filter coefficients by cutoff value
+ self._sos_low = {}
+ self._sos_high = {}
+ self._cutoff = config.options.get("cutoff")
+ Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{self._cutoff}{RESET}")
+
+ def lowpass_filter(
+ self, samples: np.ndarray, cutoff: int = 2200, order: int = 6
+ ) -> np.ndarray:
+ """
+ Apply a 6th-order low-pass Butterworth filter to remove frequencies above the cutoff.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ cutoff (int, optional): Cutoff frequency in Hz. Defaults to 2200.
+ order (int, optional): Order of the filter. Defaults to 6.
+
+ Returns:
+ np.ndarray: The low-pass filtered audio samples.
+ """
+ cutoff = self._cutoff if self._cutoff else cutoff
+
+ if not isinstance(samples, np.ndarray):
+ raise ValueError("Input samples must be a NumPy array")
+
+ nyquist = 0.5 * self.sample_rate
+ if cutoff >= nyquist:
+ Clogger.warn(f"Cutoff frequency must be less than Nyquist ({nyquist} Hz)")
+ cutoff = nyquist - (nyquist * 0.1)
+
+ # Cache coefficients to avoid recomputation for the same cutoff value.
+ if cutoff not in self._sos_low:
+ self._sos_low[cutoff] = butter(
+ order, cutoff / nyquist, btype="low", analog=False, output="sos"
+ )
+
+ return sosfilt(self._sos_low[cutoff], samples)
+
+ def highpass_filter(
+ self, samples: np.ndarray, cutoff: int = 200, order: int = 30
+ ) -> np.ndarray:
+ """
+ Apply a 6th-order high-pass Butterworth filter to remove frequencies below the cutoff.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ cutoff (int, optional): Cutoff frequency in Hz. Defaults to 200.
+ order (int, optional): Order of the filter. Defaults to 6.
+
+ Returns:
+ np.ndarray: The high-pass filtered audio samples.
+ """
+
+ cutoff = self._cutoff if self._cutoff else cutoff
+
+ if not isinstance(samples, np.ndarray):
+ raise ValueError("Input samples must be a NumPy array")
+
+ nyquist = 0.5 * self.sample_rate
+ if cutoff <= 0:
+ raise ValueError("Cutoff frequency must be positive")
+
+ if cutoff not in self._sos_high:
+ self._sos_high[cutoff] = butter(
+ order, cutoff / nyquist, btype="high", analog=False, output="sos"
+ )
+
+ return sosfilt(self._sos_high[cutoff], samples)
+
+ def denoise(
+ self,
+ samples: np.ndarray,
+ lowpass_cutoff: int = 2200,
+ highpass_cutoff: int = 200,
+ order: int = 6,
+ ) -> np.ndarray:
+ """
+ Denoise the audio by sequentially applying a low-pass filter and a high-pass filter.
+ This combination effectively acts as a band-pass filter,
+ removing both high-frequency noise (hiss) and low-frequency rumble.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ lowpass_cutoff (int, optional): Cutoff frequency for low-pass filtering. Defaults to 2200 Hz.
+ highpass_cutoff (int, optional): Cutoff frequency for high-pass filtering. Defaults to 200 Hz.
+ order (int, optional): Order of the filters. Defaults to 6.
+
+ Returns:
+ np.ndarray: The denoised audio samples.
+ """
+ noise = config.options.get("noise") if config.options.get("noise") else "low"
+
+ Clogger.info(
+ f"{fg.BLUE}Noise: {fg.CYAN}{config.options.get('noise')}{RESET}"
+ )
+ if noise == "low":
+ # Remove high-frequency noise
+ return self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order)
+ if noise == "high":
+ # Remove low-frequency noise
+ return self.highpass_filter(samples, cutoff=highpass_cutoff, order=order)
+ if noise == "both":
+ # Remove high-frequency noise
+ filtered = self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order)
+ # Remove low-frequency noise
+ return self.highpass_filter(filtered, cutoff=highpass_cutoff, order=order)
diff --git a/audiobot/core/codec.py b/audiobot/core/codec.py
new file mode 100644
index 0000000..e107d65
--- /dev/null
+++ b/audiobot/core/codec.py
@@ -0,0 +1,67 @@
+from pydub import AudioSegment
+import numpy as np
+
+
+class AudioSegmentArrayCodec:
+ """
+ This class provides functionality to convert between pydub AudioSegments and NumPy arrays.
+
+ It allows for the following conversions:\n
+ 1. AudioSegments to NumPy arrays.
+ 2. NumPy arrays to AudioSegments.
+ """
+
+ def __init__(self):
+ """
+ Initializes the AudioSegmentArrayCodec object.
+ Currently, this constructor does not perform any specific operations.
+ """
+ self = self # Note: This line has no effect and can be removed.
+
+ def numpy_to_audiosegment(self, samples, sample_rate, sample_width, channels):
+ """
+ Converts a NumPy array to a pydub AudioSegment.
+
+ Args:
+ samples (numpy.ndarray): The NumPy array representing the audio samples.
+ sample_rate (int): The sample rate of the audio in Hz.
+ sample_width (int): The sample width in bytes (e.g., 2 for 16-bit audio).
+ channels (int): The number of audio channels (1 for mono, 2 for stereo).
+
+ Returns:
+ pydub.AudioSegment: An AudioSegment object created from the NumPy array.
+ """
+ # Flatten the array if it has 2 channels (stereo)
+ if len(samples.shape) == 2 and channels == 2:
+ samples = samples.flatten()
+
+ # Convert the NumPy array to raw audio data
+ raw_data = samples.tobytes()
+
+ # Create a new AudioSegment using the raw audio data
+ return AudioSegment(
+ data=raw_data,
+ sample_width=sample_width,
+ frame_rate=sample_rate,
+ channels=channels,
+ )
+
+ def audiosegment_to_numpy(self, audio_segment):
+ """
+ Converts a pydub AudioSegment to a NumPy array.
+
+ Args:
+ audio_segment (pydub.AudioSegment): The AudioSegment object to convert.
+
+ Returns:
+ tuple: A tuple containing:
+ - numpy.ndarray: The NumPy array representing the audio samples.
+ - int: The sample rate of the audio in Hz.
+ """
+ samples = np.array(audio_segment.get_array_of_samples())
+
+ # If stereo, reshape to (n_samples, 2)
+ if audio_segment.channels == 2:
+ samples = samples.reshape((-1, 2))
+
+ return samples, audio_segment.frame_rate
diff --git a/audiobot/core/effects.py b/audiobot/core/effects.py
new file mode 100644
index 0000000..36e14da
--- /dev/null
+++ b/audiobot/core/effects.py
@@ -0,0 +1,115 @@
+from pydub import effects
+from .codec import AudioSegmentArrayCodec
+from .audio.core import AudioModulator
+from ..utils.logging_utils import colored_logger
+from pydub import AudioSegment
+
+# logger = colored_logger()
+
+
+class VoiceEffectProcessor:
+ def __init__(self, audio_segment, effect: str, verbosity: bool = False):
+ self.effect = effect.lower()
+ self.audio_segment = audio_segment
+ self.verbosity = verbosity
+ self.handler = AudioSegmentArrayCodec()
+ self.logger = colored_logger()
+
+ def _apply_chipmunk(self):
+ return AudioModulator().pitch_shift(
+ effects.speedup(self.audio_segment, 1.01), n_steps=9
+ )
+
+ def _apply_high(self):
+ return AudioModulator().pitch_shift(self.audio_segment, n_steps=4)
+
+ def _apply_lowpass(self):
+ return AudioModulator().lowpass(self.audio_segment)
+
+ def _apply_highpass(self):
+ return AudioModulator().highpass(self.audio_segment)
+
+ def _apply_robotic(self):
+ return AudioModulator().pitch_shift(
+ effects.speedup(self.audio_segment, 1.01), n_steps=-10
+ )
+
+ def _apply_demonic(self):
+ return (
+ AudioModulator()
+ .pitch_shift(effects.speedup(self.audio_segment, 1.01), n_steps=-10)
+ .overlay(
+ AudioSegment.silent(duration=700) + self.audio_segment.fade_out(500)
+ )
+ )
+
+ def _apply_hacker(self):
+ return AudioModulator().hacker(self.audio_segment)
+
+ def _apply_distortion(self):
+ samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ distorted_samples = AudioModulator().distort(samples)
+ return self.handler.numpy_to_audiosegment(
+ distorted_samples,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+
+ def _apply_deep(self):
+ return AudioModulator().pitch_shift(self.audio_segment, n_steps=-4)
+
+ def _apply_echo(self):
+ delay = AudioSegment.silent(duration=1000)
+ return self.audio_segment.overlay(delay + self.audio_segment)
+
+ def _apply_whisper(self):
+ return AudioModulator().whisper(self.audio_segment)
+
+ def _apply_reverb(self):
+ samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ reverbed_samples = AudioModulator().reverb(samples)
+ return self.handler.numpy_to_audiosegment(
+ reverbed_samples,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+
+ def denoise(self):
+ from .modulator import AudioDenoiser
+
+ sample, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ denoised_sample = AudioDenoiser().denoise(sample)
+ audio_segment = self.handler.numpy_to_audiosegment(
+ denoised_sample,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+ return audio_segment
+
+ def _get_effects(self):
+ return {
+ "chipmunk": self._apply_chipmunk,
+ "high": self._apply_high,
+ "lowpass": self._apply_lowpass,
+ "robotic": self._apply_robotic,
+ "demonic": self._apply_demonic,
+ "hacker": self._apply_hacker,
+ "distortion": self._apply_distortion,
+ "deep": self._apply_deep,
+ "echo": self._apply_echo,
+ "whisper": self._apply_whisper,
+ "reverb": self._apply_reverb,
+ "denoise": self.denoise,
+ "highpass": self._apply_highpass,
+ }
+
+ def apply_effect(self):
+ effect_handler = self._get_effects().get(self.effect)
+ if effect_handler:
+ return effect_handler()
+ elif self.verbosity:
+ self.logger.critical(f"Unknown voice effect: {self.effect}")
+ return self.audio_segment # Return unmodified audio if effect is unknown
diff --git a/audiobot/core/processor.py b/audiobot/core/processor.py
new file mode 100644
index 0000000..caf08c9
--- /dev/null
+++ b/audiobot/core/processor.py
@@ -0,0 +1,154 @@
+import os
+from .audio.core import AudioModulator
+from moviepy import AudioFileClip, VideoFileClip
+from ..utils.logging_utils import colored_logger
+from pydub import AudioSegment
+from ..utils.visualizer import audiowave_visualizer
+from ..utils.metadata_utils import get_audio_bitrate
+from .effects import VoiceEffectProcessor
+from filemac.utils.colors import fg, rs
+import sys
+# import io
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+class VideoProcessor:
+ def __init__(self):
+ pass
+
+ def process_video_file(
+ self,
+ input_file,
+ effect,
+ output_dir,
+ verbosity: bool = False,
+ visualize: bool = False,
+ ):
+ """
+ Process video file by applying audio effects and retaining original bitrate.
+ """
+
+ Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}")
+ Clogger.info(f"Processing video file: {input_file}")
+
+ try:
+ # Get the original video bitrate
+ original_bitrate = get_audio_bitrate(input_file, verbosity)
+ if verbosity and original_bitrate:
+ Clogger.info(
+ f"Original video bitrate: {fg.YELLOW}{original_bitrate}{RESET}"
+ )
+
+ # Capture stdout and stderr
+ old_stdout = sys.stdout
+ old_stderr = sys.stderr
+ # sys.stdout = captured_stdout = io.StringIO()
+ # sys.stderr = captured_stderr = io.StringIO()
+
+ # Load the video
+ try:
+ video = VideoFileClip(input_file)
+ finally:
+ sys.stdout = old_stdout # Restore stdout
+ sys.stderr = old_stderr # Restore stder
+ audio_file = "temp_audio.wav"
+
+ # Extract audio and save it to a file
+ if verbosity:
+ Clogger.info("Extract audio and write it to file")
+ video.audio.write_audiofile(audio_file)
+ audio_segment = AudioSegment.from_file(audio_file)
+
+ # Apply the selected voice effect
+ Clogger.info(
+ f"Applying the [{fg.BBWHITE}{effect}{RESET}{fg.GREEN}] effect"
+ )
+ modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect()
+
+ # Normalize the modified audio
+ modified_audio = AudioModulator().normalize(modified_audio)
+
+ # Export the modified audio to a WAV file
+ if verbosity:
+ Clogger.info("Export the modified audio to a WAV file")
+ modified_audio.export("modified_audio.wav", format="wav")
+
+ # Load the modified audio file back into an AudioFileClip
+ new_audio = AudioFileClip("modified_audio.wav")
+
+ # Set the video to use the modified audio
+ if verbosity:
+ Clogger.info("Set the video audio to the new modified audio")
+ final_video = video.with_audio(new_audio)
+
+ # Define the output file path
+ output_file = os.path.join(
+ output_dir, f"{effect}_{os.path.basename(input_file)}"
+ )
+
+ # Use the original bitrate or default to 5000k if unavailable
+ if verbosity:
+ Clogger.info(
+ f"Set:\n\tCodec = [{fg.fg.MAGENTA}libx264{fg.GREEN}\n"
+ f"\tCodec type = [{fg.fg.MAGENTA}aac{fg.GREEN}\n"
+ f"\tBitrate = [{fg.MAGENTA}{original_bitrate or '5000k'}{RESET}]"
+ )
+
+ final_video.write_videofile(
+ output_file,
+ codec="libx264",
+ audio_codec="aac",
+ bitrate=original_bitrate or "5000k",
+ )
+
+ Clogger.info(f"Modified video saved as: {output_file}")
+ Clogger.debug(f"Final bitrate = {get_audio_bitrate(output_file)}")
+ # Optional: visualize the before and after audio
+ if visualize:
+ audiowave_visualizer(audio_file, "modified_audio.wav")
+
+ # Clean up temporary files
+ if os.path.exists(audio_file):
+ os.remove(audio_file)
+ os.remove("modified_audio.wav")
+
+ except KeyboardInterrupt:
+ Clogger.info("Quit")
+ sys.exit(1)
+ except Exception as e:
+ Clogger.error(f"Error processing video file {input_file}: {e}")
+ # raise
+
+
+class AudioProcessor:
+ def __init__(self):
+ pass
+
+ def process_audio_file(
+ self, input_file, effect, output_dir, verbosity, visualize=False
+ ):
+ Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}")
+
+ Clogger.info(f"Processing audio file: {fg.MAGENTA}{input_file}{RESET}")
+
+ try:
+ audio_segment = AudioSegment.from_file(input_file)
+ if verbosity:
+ print(f"- INFO - Audio channels: {audio_segment.channels}")
+ print(f"- INFO - Audio sample width: {audio_segment.sample_width}")
+ modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect()
+ modified_audio = AudioModulator().normalize(modified_audio)
+ output_file = os.path.join(
+ output_dir, f"{effect}_{os.path.basename(input_file)}"
+ )
+ modified_audio.export(output_file, format="wav")
+ Clogger.info(f"Modified audio saved as: {output_file}")
+
+ if visualize:
+ audiowave_visualizer(input_file, output_file)
+
+ except Exception as e:
+ Clogger.error(f"Error processing audio file {input_file}: {e}")
diff --git a/audiobot/utils/logging_utils.py b/audiobot/utils/logging_utils.py
new file mode 100644
index 0000000..0500ccf
--- /dev/null
+++ b/audiobot/utils/logging_utils.py
@@ -0,0 +1,40 @@
+import logging
+from filemac.utils.colors import fg, rs
+
+RESET = rs
+
+
+class LoggingFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: fg.BBLUE,
+ logging.INFO: fg.GREEN,
+ logging.WARNING: fg.YELLOW,
+ logging.ERROR: fg.RED,
+ logging.CRITICAL: fg.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, fg.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{RESET}"
+
+
+def colored_logger(logger_name="colored_logger") -> logging.Logger:
+ """
+ Sets up a colored logger with a single handler.
+
+ Returns:
+ logging.Logger: The configured logger.
+ """
+ logger = logging.getLogger(logger_name)
+
+ if not logger.handlers: # Check if handlers already exist
+ handler = logging.StreamHandler()
+ handler.setFormatter(LoggingFormatter("- %(levelname)s - %(message)s"))
+ logger.addHandler(handler)
+ logger.setLevel(logging.INFO)
+
+ # Prevent log messages from propagating to the root logger.
+ logger.propagate = False
+
+ return logger
diff --git a/audiobot/utils/metadata_utils.py b/audiobot/utils/metadata_utils.py
new file mode 100644
index 0000000..cceec73
--- /dev/null
+++ b/audiobot/utils/metadata_utils.py
@@ -0,0 +1,57 @@
+import speech_recognition as sr
+import ffmpeg
+from .logging_utils import colored_logger
+from filemac.utils.colors import fg, rs
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+def get_audio_bitrate(input_file, verbosity=False):
+ """
+ Probes a media file using ffmpeg and returns its metadata.
+
+ Args:
+ input_file (str): The path to the media file.
+
+ Returns:
+ int: bitrate
+
+ Raises:
+ ffmpeg.Error: If ffmpeg returns a non-zero exit code.
+ FileNotFoundError: If the input file does not exist.
+ Exception: For other errors during probing.
+ """
+ if verbosity:
+ Clogger.info(
+ f"Fetch the original bitrate of the video file using {fg.YELLOW}ffmpeg{RESET}."
+ )
+ try:
+ try:
+ metadata = ffmpeg.probe(input_file)
+ finally:
+ bitrate = None
+ # Iterate over the streams and find the video stream
+ for stream in metadata["streams"]:
+ if stream["codec_type"] == "video":
+ bitrate = stream.get("bit_rate", None)
+ break
+ return bitrate
+ except ffmpeg.Error or Exception as e:
+ Clogger.error(f"Error fetching bitrate for {input_file}: {e}")
+ return None
+
+
+def transcribe_audio(input_file):
+ Clogger.info(f"Transcribing audio: {input_file}")
+ try:
+ recognizer = sr.Recognizer()
+ with sr.AudioFile(input_file) as source:
+ audio = recognizer.record(source)
+ transcription = recognizer.recognize_google(audio)
+ Clogger.info(f"Transcription: {transcription}")
+ return transcription
+ except Exception as e:
+ Clogger.error(f"Error transcribing audio file {input_file}: {e}")
+ return None
diff --git a/audiobot/utils/visualizer.py b/audiobot/utils/visualizer.py
new file mode 100644
index 0000000..d4a5baa
--- /dev/null
+++ b/audiobot/utils/visualizer.py
@@ -0,0 +1,25 @@
+import matplotlib.pyplot as plt
+import soundfile as sf
+from .logging_utils import colored_logger
+
+
+Clogger = colored_logger()
+
+
+def audiowave_visualizer(original_file, modified_file):
+ Clogger.info(f"Visualizing audio: {original_file} and {modified_file}")
+ try:
+ original_data, original_sr = sf.read(original_file)
+ modified_data, modified_sr = sf.read(modified_file)
+
+ plt.figure(figsize=(14, 5))
+ plt.subplot(2, 1, 1)
+ plt.plot(original_data)
+ plt.title("Original Audio Waveform")
+ plt.subplot(2, 1, 2)
+ plt.plot(modified_data)
+ plt.title("Modified Audio Waveform")
+ plt.show()
+
+ except Exception as e:
+ Clogger.error(f"Error visualizing audio: {e}")
diff --git a/audiobot/version.txt b/audiobot/version.txt
new file mode 100644
index 0000000..0ea3a94
--- /dev/null
+++ b/audiobot/version.txt
@@ -0,0 +1 @@
+0.2.0
diff --git a/build/lib/filemac/AudioExtractor.py b/build/lib/filemac/AudioExtractor.py
deleted file mode 100644
index 65172b1..0000000
--- a/build/lib/filemac/AudioExtractor.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import sys
-from moviepy.editor import VideoFileClip
-import logging
-import logging.handlers
-###############################################################################
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class ExtractAudio:
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- ls = ["mp4", "mkv"]
- if os.path.isfile(file_path) and any(file_path.lower().endswith(ext) for ext in ls):
- files_to_process.append(file_path)
-
- return files_to_process
- except Exception as e:
- print(e)
-
- def moviepyextract(self):
- try:
- video_list = self.preprocess()
- for input_video in video_list:
- print("\033[1;33mExtracting..\033[1;36m")
- video = VideoFileClip(input_video)
- audio = video.audio
- basename, _ = os.path.splitext(input_video)
- outfile = basename + ".wav"
- audio.write_audiofile(outfile)
- # print(f"\033[1;32mFile saved as \033[36m{outfile}\033[0m")
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-
-if __name__ == "__main__":
- vi = ExtractAudio(
- "/home/skye/Music/Melody in My Mind.mp4")
- vi.moviepyextract()
diff --git a/build/lib/filemac/OCRTextExtractor.py b/build/lib/filemac/OCRTextExtractor.py
deleted file mode 100644
index 392ff6d..0000000
--- a/build/lib/filemac/OCRTextExtractor.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import os
-import sys
-import cv2
-import pytesseract
-from PIL import Image
-import logging
-import logging.handlers
-###############################################################################
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-###############################################################################
-'''Do OCR text extraction from a given image file and display the extracted
- text
- to the screen finally save it to a text file assuming the name of the input
- file'''
-
-###############################################################################
-
-
-class ExtractText:
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def OCR(self):
- image_list = self.preprocess()
- ls = ['png', 'jpg']
- image_list = [
- item for item in image_list if any(item.lower().endswith(ext)
- for ext in ls)]
-
- def ocr_text_extraction(image_path):
- '''Load image using OpenCV'''
- img = cv2.imread(image_path)
-
- logger.info(f"\033[2;95mprocessing {image_path}...\033[0m")
-
- try:
- '''Preprocess image for better OCR results'''
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- thresh = cv2.threshold(
- gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
- img_pil = Image.fromarray(thresh)
-
- '''Perform OCR using pytesseract'''
- config = ("-l eng --oem 3 --psm 6")
- text = pytesseract.image_to_string((img_pil), config=config)
-
- '''Remove extra whitespaces and newlines
- text = ' '.join(text.split()).strip()'''
- logger.info("\033[36mFound:\n\033[0m")
- print(text)
- current_path = os.getcwd()
- file_path = os.path.join(current_path, OCR_file)
- ''' Save the extracted text to specified file '''
- logger.info("\033[1;92mGenerating text file for the extracted \
-text..\033[0m")
-
- with open(file_path, 'w') as file:
- file.write(text)
- logger.info(
- f"File saved as \033[1;93m{OCR_file}\033[0m:")
- '''If there are multiple candidate images for text extraction,
- wait for key press before proceeding to the next
- image otherwise don't wait
- size = [i for i in enumerate(image_list)]'''
- if len(image_list) >= 2:
- input("\033[5;97mPress Enter to continue\033[0m")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(0)
- except FileNotFoundError as e:
- logger.error(f"Error: {str(e)}")
- except IOError as e:
- logger.error(
- f"Could not write to output file '{OCR_file}'. \
-Reason: {str(e)}\033[0m")
- except Exception as e:
- logger.error(f"Error: {type(e).__name__}: {str(e)}")
- except Exception as e:
- logger.error(f"Error:>>\033[31m{e}\033[0m")
- return text
-
- for image_path in image_list:
- OCR_file = image_path[:-4] + ".txt"
- ocr_text_extraction(image_path)
diff --git a/build/lib/filemac/__init__.py b/build/lib/filemac/__init__.py
deleted file mode 100644
index e32c40a..0000000
--- a/build/lib/filemac/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .fmac import main
diff --git a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc
deleted file mode 100644
index 36b350c..0000000
Binary files a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc
deleted file mode 100644
index 2e0efeb..0000000
Binary files a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc b/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc
deleted file mode 100644
index a29f114..0000000
Binary files a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/converter.cpython-311.pyc b/build/lib/filemac/__pycache__/converter.cpython-311.pyc
deleted file mode 100644
index cbc7e1f..0000000
Binary files a/build/lib/filemac/__pycache__/converter.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/formats.cpython-311.pyc b/build/lib/filemac/__pycache__/formats.cpython-311.pyc
deleted file mode 100644
index d2b6f26..0000000
Binary files a/build/lib/filemac/__pycache__/formats.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/colors.py b/build/lib/filemac/colors.py
deleted file mode 100644
index 7e03e49..0000000
--- a/build/lib/filemac/colors.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-
-from colorama import Fore, Style, init
-
-init(autoreset=True)
-
-if os.name == "posix":
- RESET = '\033[0m'
- RED = '\033[91m'
- DRED = '\033[1;91m'
- GREEN = '\033[92m'
- DGREEN = '\033[1;92m'
- YELLOW = '\033[93m'
- DYELLOW = '\033[1;93m'
- BLUE = '\033[94m'
- DBLUE = '\033[1;94m'
- MAGENTA = '\033[95m'
- DMAGENTA = '\033[1;95m'
- CYAN = '\033[96m'
- DCYAN = '\033[1;96m'
- ICYAN = '\033[3;96m'
-
-elif os.name == "nt":
- RESET = Style.RESET_ALL
- RED = Fore.LIGHTRED_EX
- DRED = Fore.RED
- GREEN = Fore.LIGHTGREEN_EX
- DGREEN = Fore.GREEN
- YELLOW = Fore.LIGHTYELLOW_EX
- DYELLOW = Fore.YELLOW
- BLUE = Fore.LIGHTBLUE_EX
- DBLUE = Fore.BLUE
- MAGENTA = Fore.LIGHTMAGENTA_EX
- DMAGENTA = Fore.MAGENTA
- CYAN = Fore.LIGHTCYAN_EX
- DCYAN = Fore.CYAN
- ICYAN = Fore.WHITE
-
-#return RESET, RED, DRED, GREEN, DGREEN, YELLOW, DYELLOW, BLUE, DBLUE,
-#MAGENTA, DMAGENTA, CYAN, DCYAN
diff --git a/build/lib/filemac/converter.py b/build/lib/filemac/converter.py
deleted file mode 100644
index a46a46f..0000000
--- a/build/lib/filemac/converter.py
+++ /dev/null
@@ -1,1027 +0,0 @@
-#############################################################################
-import logging
-import logging.handlers
-# import math
-import os
-import re
-import sqlite3
-import subprocess
-import sys
-import time
-import traceback
-# import pdfminer.high_level
-# from typing import Iterable
-from pdf2image import convert_from_path
-import cv2
-import pandas as pd
-import pydub
-import PyPDF2
-# import pytesseract
-import requests
-import speedtest
-from docx import Document
-# from pydub.playback import play
-from gtts import gTTS
-# from PyPDF2 import PdfFileReader
-from moviepy.editor import VideoFileClip
-from pdf2docx import parse
-from PIL import Image
-from pptx import Presentation
-from pydub import AudioSegment
-from .colors import (RESET, GREEN, DGREEN, YELLOW, DYELLOW, CYAN, BLUE, DBLUE,
- MAGENTA, DMAGENTA, RED, DRED, ICYAN)
-from reportlab.lib.pagesizes import letter
-from reportlab.platypus import Paragraph, SimpleDocTemplate
-
-from .formats import (SUPPORTED_AUDIO_FORMATS, SUPPORTED_IMAGE_FORMATS,
- SUPPORTED_VIDEO_FORMATS)
-
-# import pygame
-# from aspose.words import Document as aspose_document
-# from aspose.slides import Presentation as aspose_presentation
-# from show_progress import progress_show
-# from PIL import ImageDraw, ImageFont
-###############################################################################
-
-PYGAME_DETECT_AVX2 = 1
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class MakeConversion:
-
- '''Initialize the class'''
-
- def __init__(self, input_file):
- self.input_file = input_file
-
- '''Check input object whether it's a file or a directory if a file append
- the file to a set and return it otherwise append directory full path
- content to the set and return the set file. The returned set will be
- evaluated in the next step as required on the basis of requested operation
- For every requested operation, the output file if any is automatically
- generated on the basis of the input filename and saved in the sam
- directory as the input file
- '''
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
- except Exception as e:
- print(e)
-
-###############################################################################
-# Convert word file to pdf document (docx)
-###############################################################################
- def word_to_pdf(self):
- word_list = self.preprocess()
- ls = ["doc", "docx"]
- word_list = [
- item for item in word_list if any(item.lower().endswith(ext) for ext in ls)]
- for word_file in word_list:
- if word_file.lower().endswith("doc"):
- pdf_file = word_file[:-3] + "pdf"
- elif word_file.lower().endswith("docx"):
- pdf_file = word_file[:-4] + "pdf"
-
- try:
- print(
- f'{BLUE}Converting: {RESET}{word_file} {BLUE}to {RESET}{pdf_file}')
- if os.name == 'posix': # Check if running on Linux
- # Use subprocess to run the dpkg and grep commands
- result = subprocess.run(
- ['dpkg', '-l', 'libreoffice'], stdout=subprocess.PIPE, text=True)
- if result.returncode != 0:
- print(
- "Please install libreoffice to use this functionality !")
- sys.exit(1)
- subprocess.run(['soffice', '--convert-to',
- 'pdf', word_file, pdf_file])
- # print(f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}")
- elif os.name == "nt":
- try:
- from docx2pdf import convert
- except ImportError:
- print("Run pip install docx2pdf for this function to work")
- sys.exit(1)
- convert(word_file, pdf_file)
- print(
- f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}")
-
- except Exception as e:
- print(f"Error converting {word_file} to {pdf_file}: {e}")
-
-###############################################################################
-# Convert pdf file to word document (docx)
-###############################################################################
- def pdf_to_word(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
- for pdf_file in pdf_list:
- if pdf_file.lower().endswith("pdf"):
- word_file = pdf_file[:-3] + "docx"
-
- try:
-
- parse(pdf_file, word_file, start=0, end=None)
-
- print(f'{GREEN}Converting to word..{RESET}', end='\r')
-
- logger.info(f"{DMAGENTA} Successfully converted{pdf_file} \
-to {word_file}{RESET}")
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- logger.info(f'{DRED}All conversion attempts have failed: \
-{e}{RESET}')
-
-###############################################################################
-# Convert text file(s) to pdf document (docx)
-###############################################################################
- def txt_to_pdf(input_file, output_file):
- """Convert a .txt file to a PDF."""
-
- # Read the contents of the input .txt file
- with open(input_file, 'r', encoding='utf-8') as file:
- text_contents = file.readlines()
-
- # Initialize the PDF document
- doc = SimpleDocTemplate(output_file, pagesize=letter)
-
- # Create a story to hold the elements of the PDF
- story = []
-
- # Iterate through each line in the input .txt file and add it to the PDF
- for line in text_contents:
- story.append(Paragraph(line.strip(), style="normalText"))
-
- # Build and write the PDF document
- doc.build(story)
-
-###############################################################################
-# Convert word file(s) to pptx document (pptx/ppt)
-###############################################################################
- def word_to_pptx(self):
- word_list = self.preprocess()
- word_list = [item for item in word_list if item.lower().endswith(
- "docx") or item.lower().endswith("doc")]
-
- for word_file in word_list:
-
- if word_list is None:
- print("Please provide appropriate file type")
- sys.exit(1)
- if word_file.lower().endswith("docx"):
- pptx_file = word_file[:-4] + "pptx"
- elif word_file.lower().endswith("doc"):
- pptx_file = word_file[:-3] + "pptx"
- try:
- # Load the Word document
- print(F"{DYELLOW}Load the Word document..{RESET}")
- doc = Document(word_file)
-
- # Create a new PowerPoint presentation
- print(F"{DYELLOW}Create a new PowerPoint presentation..{RESET}")
- prs = Presentation()
-
- # Iterate through each paragraph in the Word document
- print(
- f"{DGREEN}Populating pptx slides with {DYELLOW}{len(doc.paragraphs)}{DGREEN} entries..{RESET}")
- count = 0
- for paragraph in doc.paragraphs:
- count += 1
- perc = (count/len(doc.paragraphs))*100
- print(
- f"{DMAGENTA}Progress:: \033[1;36m{perc:.2f}%{RESET}", end="\r")
- # Create a new slide in the PowerPoint presentation
- slide = prs.slides.add_slide(prs.slide_layouts[1])
-
- # Add the paragraph text to the slide
- slide.shapes.title.text = paragraph.text
-
- # Save the PowerPoint presentation
- prs.save(pptx_file)
- print(f"\n{DGREEN}Done{RESET}")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- logger.error(e)
-
-###############################################################################
-# Convert word file to txt file'''
-###############################################################################
-
- def word_to_txt(self):
- word_list = self.preprocess()
- word_list = [item for item in word_list if item.lower().endswith(
- "docx") or item.lower().endswith("doc")]
- for file_path in word_list:
- if file_path.lower().endswith("docx"):
- txt_file = file_path[:-4] + "txt"
- elif file_path.lower().endswith("doc"):
- txt_file = file_path[:-3] + "txt"
- try:
- doc = Document(file_path)
- print("INFO Processing...")
-
- with open(txt_file, 'w', encoding='utf-8') as f:
- Par = 0
- for paragraph in doc.paragraphs:
- f.write(paragraph.text + '\n')
- Par += 1
-
- print(f"Par:{BLUE}{Par}/{len(doc.paragraphs)}{RESET}", end='\r')
- logger.info(f"{DMAGENTA}Conversion of file to txt success{RESET}")
-
- except KeyboardInterrupt:
- print("\nExit")
- sys.exit()
- except Exception as e:
- logger.error(
- f"Dear user something went amiss while attempting the conversion:\n {e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(f"Couldn't convert {file_path} to {txt_file}:\
-REASON->{e}")
-
-###############################################################################
-# Convert pdf file to text file
-###############################################################################
- def pdf_to_txt(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
- for file_path in pdf_list:
- txt_file = file_path[:-3] + "txt"
- try:
- with open(file_path, 'rb') as file:
- pdf_reader = PyPDF2.PdfReader(file)
- text = ''
- for page_num in range(len(pdf_reader.pages)):
- page = pdf_reader.pages[page_num]
- text += page.extract_text()
- with open(txt_file, 'w', encoding='utf-8') as f:
- f.write(text)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
-{txt_file}{RESET}")
- except Exception as e:
- logger.error(
- f"Oops somethin went astray while converting {file_path} \
-to {txt_file}: {e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Error converting {file_path} to {txt_file}: {e}\n")
-
-###############################################################################
-# Convert ppt file to word document
-###############################################################################
- def ppt_to_word(self):
- ppt_list = self.preprocess()
- ppt_list = [item for item in ppt_list if item.lower().endswith(
- "pptx") or item.lower().endswith("ppt")]
- for file_path in ppt_list:
- if file_path.lower().endswith("pptx"):
- word_file = file_path[:-4] + "docx"
- elif file_path.lower().endswith("ppt"):
- word_file = file_path[:-3] + "docx"
- try:
- presentation = Presentation(file_path)
- document = Document()
-
- for slide in presentation.slides:
- for shape in slide.shapes:
- if shape.has_text_frame:
- text_frame = shape.text_frame
- for paragraph in text_frame.paragraphs:
- new_paragraph = document.add_paragraph()
- for run in paragraph.runs:
- new_run = new_paragraph.add_run(run.text)
- # Preserve bold formatting
- new_run.bold = run.font.bold
- # Preserve italic formatting
- new_run.italic = run.font.italic
- # Preserve underline formatting
- new_run.underline = run.font.underline
- # Preserve font name
- new_run.font.name = run.font.name
- # Preserve font size
- new_run.font.size = run.font.size
- try:
- # Preserve font color
- new_run.font.color.rgb = run.font.color.rgb
- except AttributeError:
- # Ignore error and continue without
- # setting the font color
- pass
- # Add a new paragraph after each slide
- document.add_paragraph()
- document.save(word_file)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
- {word_file}{RESET}")
- except Exception as e:
- logger.error(
- f"Oops somethin gwent awry while attempting to convert \
- {file_path} to {word_file}:\n>>>{e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Oops something went astray while attempting \
- convert {file_path} to {word_file}:{e}\n")
-
-###############################################################################
-# Convert text file to word
-###############################################################################
- def text_to_word(self):
- flist = self.preprocess()
- flist = [item for item in flist if item.lower().endswith("txt")]
- for file_path in flist:
- if file_path.lower().endswith("txt"):
- word_file = file_path[:-3] + "docx"
-
- try:
- # Read the text file
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
- text_content = file.read()
-
- # Filter out non-XML characters
- filtered_content = re.sub(
- r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]+', '', text_content)
-
- # Create a new Word document
- doc = Document()
- # Add the filtered text content to the document
- doc.add_paragraph(filtered_content)
-
- # Save the document as a Word file
- doc.save(word_file)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
- {word_file}{RESET}")
- except FileExistsError as e:
- logger.error(f"{str(e)}")
- except Exception as e:
- logger.error(
- f"Oops Unable to perfom requested conversion: {e}\n")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Error converting {file_path} to {word_file}: \
-{e}\n")
-
-###############################################################################
-# Convert xlsx file(s) to word file(s)
-###############################################################################
- def convert_xls_to_word(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [item for item in xls_list if any(
- item.lower().endswith(ext) for ext in ls)]
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- for xls_file in xls_list:
- if xls_file.lower().endswith("xlsx"):
- word_file = xls_file[:-4] + "docx"
- elif xls_file.lower().endswith("xls"):
- word_file = xls_file[:-3] + "docx"
- try:
- '''Read the XLS file using pandas'''
-
- df = pd.read_excel(xls_file)
-
- '''Create a new Word document'''
- doc = Document()
-
- '''Iterate over the rows of the dataframe and add them to the
- Word document'''
- logger.info(f"{ICYAN}Converting {xls_file}..{RESET}")
- # time.sleep(2)
- total_rows = df.shape[0]
- for _, row in df.iterrows():
- current_row = _ + 1
- percentage = (current_row / total_rows)*100
- for value in row:
- doc.add_paragraph(str(value))
- print(f"Row {DYELLOW}{current_row}/{total_rows} \
-{DBLUE}{percentage:.1f}%{RESET}", end="\r")
- # print(f"\033[1;36m{row}{RESET}")
-
- # Save the Word document
- doc.save(word_file)
- print(F"{DGREEN}Conversion successful!{RESET}", end="\n")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- print("Oops Conversion failed:", str(e))
-
-###############################################################################
- '''Convert xlsx/xls file/files to text file format'''
-###############################################################################
-
- def convert_xls_to_text(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [
- item for item in xls_list if any(item.lower().endswith(ext)
- for ext in ls)]
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- for xls_file in xls_list:
- if xls_file .lower().endswith("xlsx"):
- txt_file = xls_file[:-4] + "txt"
- elif xls_file .lower().endswith("xls"):
- txt_file = xls_file[:-3] + "txt"
- try:
- # Read the XLS file using pandas
- logger.info(f"Converting {xls_file}..")
- df = pd.read_excel(xls_file)
-
- # Convert the dataframe to plain text
- text = df.to_string(index=False)
- chars = len(text)
- words = len(text.split())
- lines = len(text.splitlines())
-
- print(
- f"Preparing to write: {DYELLOW}{chars} \033[1;30m \
-characters{DYELLOW} {words}\033[1;30m words {DYELLOW}{lines}\033[1;30m \
-lines {RESET}", end="\n")
- # Write the plain text to the output file
- with open(txt_file, 'w') as file:
- file.write(text)
-
- print(F"{DGREEN}Conversion successful!{RESET}", end="\n")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- print("Oops Conversion failed:", str(e))
-
-###############################################################################
- '''Convert xlsx/xls file to csv(comma seperated values) format'''
-###############################################################################
-
- def convert_xlsx_to_csv(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [
- item for item in xls_list if any(item.lower().endswith(ext)
- for ext in ls)]
- for xls_file in xls_list:
- if xls_file.lower().endswith("xlsx"):
- csv_file = xls_file[:-4] + "csv"
- elif xls_file.lower().endswith("xls"):
- csv_file = xls_file[:-3] + "csv"
- try:
- '''Load the Excel file'''
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- df = pd.read_excel(xls_file)
- logger.info(f"Converting {xls_file}..")
- total_rows = df.shape[0]
- print(f"Writing {DYELLOW}{total_rows} rows {RESET}", end="\n")
- for i in range(101):
- print(f"Progress: {i}%", end="\r")
- '''Save the DataFrame to CSV'''
- df.to_csv(csv_file, index=False)
- print(F"{DMAGENTA} Conversion successful{RESET}")
- except KeyboardInterrupt:
- print("Exiting")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-###############################################################################
-# Convert xlsx file(s) to sqlite
-###############################################################################
-
- def convert_xlsx_to_database(self):
- xlsx_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xlsx_list = [
- item for item in xlsx_list if any(item.lower().endswith(ext)
- for ext in ls)]
- for xlsx_file in xlsx_list:
- if xlsx_file.lower().endswith("xlsx"):
- sqlfile = xlsx_file[:-4]
- elif xlsx_file.lower().endswith("xls"):
- sqlfile = xlsx_file[:-3]
- try:
- db_file = input(
- F"{DBLUE}Please enter desired sql filename: {RESET}")
- table_name = input(
- "Please enter desired table name: ")
- # res = ["db_file", "table_name"]
- if any(db_file) == "":
- db_file = sqlfile + "sql"
- table_name = sqlfile
- if not db_file.endswith(".sql"):
- db_file = db_file + ".sql"
- column = 0
- for i in range(20):
- column += 0
- # Read the Excel file into a pandas DataFrame
- print(f"Reading {xlsx_file}...")
- df = pd.read_excel(xlsx_file)
- print(f"{DGREEN}Initializing conversion sequence{RESET}")
- print(f"{DGREEN} Connected to sqlite3 database::{RESET}")
- # Create a connection to the SQLite database
- conn = sqlite3.connect(db_file)
- print(F"{DYELLOW} Creating database table::{RESET}")
- # Insert the DataFrame into a new table in the database
- df.to_sql(table_name, column, conn,
- if_exists='replace', index=False)
- print(
- f"Operation successful{RESET} file saved as \033[32{db_file}{RESET}")
- # Close the database connection
- conn.close()
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- logger.error(f"{e}")
-
-###############################################################################
-# Create image objects from given files
-###############################################################################
- def doc2image(self, outf="png"):
- outf_list = ['png', 'jpg']
- if outf not in outf_list:
- outf = "png"
- path_list = self.preprocess()
- ls = ["pdf", "doc", "docx"]
- file_list = [
- item for item in path_list if any(item.lower().endswith(ext)
- for ext in ls)]
- imgs = []
- for file in file_list:
- if file.lower().endswith("pdf"):
- # Convert the PDF to a list of PIL image objects
- print("Generate image objects ..")
- images = convert_from_path(file)
-
- # Save each image to a file
- fname = file[:-4]
- print(f"{YELLOW}Target images{BLUE} {len(images)}{RESET}")
- for i, image in enumerate(images):
- print(f"{DBLUE}{i}{RESET}", end="\r")
- yd = f"{fname}_{i+1}.{outf}"
- image.save(yd)
- imgs.append(yd)
- print(f"{GREEN}Ok{RESET}")
-
- return imgs
-
-
-class Scanner:
-
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def scanPDF(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
-
- for pdf in pdf_list:
- out_f = pdf[:-3] + 'txt'
- print(f"{YELLOW}Read pdf ..{RESET}")
-
- with open(pdf, 'rb') as f:
- reader = PyPDF2.PdfReader(f)
- text = ''
-
- pg = 0
- for page_num in range(len(reader.pages)):
- pg += 1
-
- print(f"{DYELLOW}Progress:{RESET}", end="")
- print(f"{CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r")
- page = reader.pages[page_num]
- text += page.extract_text()
-
- print(f"\n{text}")
- print(F"\n{YELLOW}Write text to {GREEN}{out_f}{RESET}")
- with open(out_f, 'w') as f:
- f.write(text)
-
- print(F"{DGREEN}Ok{RESET}")
-
- def scanAsImgs(self):
- file = self.input_file
- mc = MakeConversion(file)
- img_objs = mc.doc2image()
- # print(img_objs)
- from .OCRTextExtractor import ExtractText
- text = ''
- for i in img_objs:
- extract = ExtractText(i)
- tx = extract.OCR()
- if tx is not None:
- text += tx
- print(text)
- print(f"{GREEN}Ok{RESET}")
- return text
-
-
-class FileSynthesis:
-
- def __init__(self, input_file):
- self.input_file = input_file
- # self.CHUNK_SIZE = 20_000
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- @staticmethod
- def join_audios(files, output_file):
- masterfile = output_file + "_master.mp3"
- print(
- f"{DBLUE}Create a master file {DMAGENTA}{masterfile}{RESET}", end='\r')
- # Create a list to store files
- ogg_files = []
- # loop through the directory while adding the ogg files to the list
- print(files)
- for filename in files:
- print(f"Join {DBLUE}{len(files)}{RESET} files")
- # if filename.endswith('.ogg'):
- # ogg_file = os.path.join(path, filename)
- ogg_files.append(AudioSegment.from_file(filename))
-
- # Concatenate the ogg files
- combined_ogg = ogg_files[0]
- for i in range(1, len(files)):
- combined_ogg += ogg_files[i]
-
- # Export the combined ogg to new mp3 file or ogg file
- combined_ogg.export(output_file + "_master.ogg", format='ogg')
- print(F"{DGREEN}Master file:Ok {RESET}")
-
- def Synthesise(self, text: str, output_file: str, CHUNK_SIZE: int = 20_000, ogg_folder: str = 'tempfile', retries: int = 5) -> None:
- """Converts given text to speech using Google Text-to-Speech API."""
- out_ls = []
- try:
- if not os.path.exists(ogg_folder):
- os.mkdir(ogg_folder)
- print(f"{DYELLOW}Get initial net speed..{RESET}")
- st = speedtest.Speedtest() # get initial network speed
- st.get_best_server()
- download_speed: float = st.download() # Keep units as bytes
- logger.info(
-
- f"{GREEN} Conversion to mp3 sequence initialized start\
-speed {CYAN}{download_speed/1_000_000:.2f}Kbps{RESET}")
-
- for attempt in range(retries):
- try:
- '''Split input text into smaller parts and generate
- individual gTTS objects'''
- counter = 0
- for i in range(0, len(text), CHUNK_SIZE):
- chunk = text[i:i+CHUNK_SIZE]
- output_filename = f"{output_file}_{counter}.ogg"
- counter += 1
- # print(output_filename)
- if os.path.exists(output_filename):
- output_filename = f"{output_file}_{counter+1}.ogg"
- # print(output_filename)
- tts = gTTS(text=chunk, lang='en', slow=False)
- tts.save(output_filename)
- out_ls.append(output_filename)
- break
- # print(out_ls)
- '''Handle any network related issue gracefully'''
- except Exception in (ConnectionError, ConnectionAbortedError,
- ConnectionRefusedError,
- ConnectionResetError) as e:
- logger.error(f"Sorry boss connection problem encountered: {e} in {attempt+1}/{retries}:")
- time.sleep(5) # Wait 5 seconds before retrying
-
- # Handle connectivity/network error
- except requests.exceptions.RequestException as e:
- logger.error(f"{e}")
- except Exception as e:
- logger.error(f'{DRED} Error during conversion attempt \
-{attempt+1}/{retries}:{e}{RESET}')
- tb = traceback.extract_tb(sys.exc_info()[2])
- logger.info("\n".join([f" > {line}"
- for line in map(str, tb)]))
- time.sleep(3) # Wait 5 seconds before retrying
- pass
-
- if attempt >= retries:
- logger.error(
- f"Conversion unsuccessful after {retries} attempts.")
- sys.exit(2)
-
- finally:
- # print(out_ls)
- # Combine generated gTTS objects
- if len(out_ls) >= 1:
- FileSynthesis.join_audios(out_ls, output_file)
-
- st = speedtest.Speedtest()
- logger.info("Done")
- print("Get final speed ...")
- logger.info(
-
- f"{YELLOW}Final Network Speed: {st.download()/(10**6):.2f} Kbps{RESET}")
-
- @staticmethod
- def pdf_to_text(pdf_path):
- logger.info('''Processing the file...\n''')
- logger.info(
- F'{GREEN} Initializing pdf to text conversion sequence...{RESET}')
- try:
- with open(pdf_path, 'rb') as file:
- pdf_reader = PyPDF2.PdfReader(file)
- text = ''
- for page_num in range(len(pdf_reader.pages)):
- page = pdf_reader.pages[page_num]
- text += page.extract_text()
- print(F"{DGREEN}Ok{RESET}")
- return text
- except Exception as e:
- logger.error(
- f"{DRED}Failed to extract text from '{YELLOW}{pdf_path}'{RESET}:\n {e}")
-
- @staticmethod
- def text_file(input_file):
- try:
- with open(input_file, 'r', errors='ignore') as file:
- text = file.read().replace('\n', ' ')
- return text
- except FileNotFoundError:
- logger.error("File '{}' was not found.".format(input_file))
- except Exception as e:
- logger.error(
- F"{DRED}Error converting {input_file} to text: {str(e)}\
-{RESET}")
-
- @staticmethod
- def docx_to_text(docx_path):
- try:
- logger.info(f"{BLUE} Converting {docx_path} to text...{RESET}")
- doc = Document(docx_path)
- paragraphs = [paragraph.text for paragraph in doc.paragraphs]
- return '\n'.join(paragraphs)
- except FileNotFoundError:
- logger.error(f"File '{docx_path}' was not found.")
- except Exception as e:
- logger.error(
- F"{DRED}Error converting {docx_path} to text: {e}\
-{RESET}")
-
- '''Handle input files based on type to initialize conversion sequence'''
-
- def audiofy(self):
- input_list = self.preprocess()
- extdoc = ["docx", "doc"]
- ls = {"pdf", "docx", "doc", "txt"}
- input_list = [item for item in input_list if item.lower().endswith(tuple(ls))]
- for input_file in input_list:
- if input_file.endswith('.pdf'):
- text = FileSynthesis.pdf_to_text(input_file)
- output_file = input_file[:-4]
-
- elif input_file.lower().endswith(tuple(extdoc)):
-
- text = FileSynthesis.docx_to_text(input_file)
- output_file = input_file[:-5]
-
- elif input_file.endswith('.txt'):
- text = FileSynthesis.text_file(input_file)
- output_file = input_file[:-4]
-
- else:
- logger.error('Unsupported file format. Please provide \
-a PDF, txt, or Word document.')
- sys.exit(1)
- try:
- FileSynthesis.Synthesise(None, text, output_file)
- except KeyboardInterrupt:
- sys.exit(1)
-
-
-###############################################################################
-# Convert video file to from one format to another'''
-###############################################################################
-
-
-class VideoConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def CONVERT_VIDEO(self):
- try:
- input_list = self.preprocess()
- out_f = self.out_format.upper()
- input_list = [item for item in input_list if any(
- item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)]
- print(F"{DYELLOW}Initializing conversion..{RESET}")
-
- for file in input_list:
- if out_f.upper() in SUPPORTED_VIDEO_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + '.' + out_f.lower()
- print(output_filename)
- else:
- print("Unsupported output format")
- sys.exit(1)
- format_codec = {
- "MP4": "mpeg4",
- "AVI": "rawvideo",
- # "OGV": "avc",
- "WEBM": "libvpx",
- "MOV": "mpeg4",
- "MKV": "MPEG4",
- "FLV": "flv"
- # "WMV": "WMV"
- }
- '''Load the video file'''
- print(f"{DBLUE}oad file{RESET}")
- video = VideoFileClip(file)
- '''Export the video to a different format'''
- print(f"{DMAGENTA}Converting file to {output_filename}{RESET}")
- video.write_videofile(
- output_filename, codec=format_codec[out_f])
- '''Close the video file'''
- print(f"{DGREEN}Done{RESET}")
- video.close()
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-
-###############################################################################
-# Convert Audio file to from one format to another'''
-###############################################################################
-
-
-class AudioConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def pydub_conv(self):
- input_list = self.preprocess()
- out_f = self.out_format
- input_list = [item for item in input_list if any(
- item.lower().endswith(ext) for ext in SUPPORTED_AUDIO_FORMATS)]
- print(F"{DYELLOW}Initializing conversion..{RESET}")
- for file in input_list:
- if out_f.lower() in SUPPORTED_AUDIO_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + '.' + out_f
- else:
- print("Unsupported output format")
- sys.exit(1)
- fmt = ext[1:]
- print(fmt, out_f)
- audio = pydub.AudioSegment.from_file(file, fmt)
- print(f"{DMAGENTA}Converting to {output_filename}{RESET}")
- audio.export(output_filename, format=out_f)
- # new_audio = pydub.AudioSegment.from_file('output_audio.')
- print(f"{DGREEN}Done{RESET}")
- # play(new_audio)
- # new_audio.close()
-
-
-###############################################################################
-# Convert images file to from one format to another
-###############################################################################
-
-
-class ImageConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
- except FileNotFoundError:
- print("File not found")
- sys.exit(1)
-
- def convert_image(self):
- try:
- input_list = self.preprocess()
- out_f = self.out_format.upper()
-
- input_list = [item for item in input_list if any(
- item.lower().endswith(ext) for ext in SUPPORTED_IMAGE_FORMATS[out_f])]
- for file in input_list:
- print(file)
- if out_f.upper() in SUPPORTED_IMAGE_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + \
- SUPPORTED_IMAGE_FORMATS[out_f].lower()
- else:
- print("Unsupported output format")
- sys.exit(1)
- '''Load the image using OpenCV: '''
- print(F"{DYELLOW}Reading input image..{RESET}")
- img = cv2.imread(file)
- '''Convert the OpenCV image to a PIL image: '''
- print(f"{DMAGENTA}Converting to PIL image{RESET}")
- pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
- '''Save the PIL image to a different format: '''
- print(f"\033[1;36mSaving image as {output_filename}{RESET}")
- pil_img.save(output_filename, out_f)
- print(f"{DGREEN}Done{RESET}")
- '''Load the image back into OpenCV: '''
- print(f"{DMAGENTA}Load and display image{RESET}")
- opencv_img = cv2.imread(output_filename)
- '''Display the images: '''
- cv2.imshow('OpenCV Image', opencv_img)
- # pil_img.show()
- '''Wait for the user to press a key and close the windows: '''
- cv2.waitKey(0)
- cv2.destroyAllWindows()
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
diff --git a/build/lib/filemac/dd.py b/build/lib/filemac/dd.py
deleted file mode 100644
index 90fbe1f..0000000
--- a/build/lib/filemac/dd.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from OCRTextExtractor import ExtractText
-img_objs = ['/home/skye/Software Engineering/Y2/SEM2/RV/SPE 2210 Client Side Programming Year II Semester II_1.png']
-text = ''
-for i in img_objs:
- extract = ExtractText(i)
- tx = extract.OCR()
- print(tx)
- if tx is not None:
- text += tx
-print(text)
diff --git a/build/lib/filemac/fmac.py b/build/lib/filemac/fmac.py
deleted file mode 100644
index 91b28ba..0000000
--- a/build/lib/filemac/fmac.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3.11.7
-# multimedia_cli/main.py
-import argparse
-import logging
-import logging.handlers
-import sys
-
-from . import handle_warnings
-from .AudioExtractor import ExtractAudio
-from .colors import (RESET, DYELLOW)
-from .converter import (AudioConverter, FileSynthesis, ImageConverter,
- MakeConversion, Scanner, VideoConverter)
-from .formats import (SUPPORTED_AUDIO_FORMATS_SHOW, SUPPORTED_DOC_FORMATS,
- SUPPORTED_IMAGE_FORMATS_SHOW,
- SUPPORTED_VIDEO_FORMATS_SHOW)
-from .image_op import Compress_Size
-from .OCRTextExtractor import ExtractText
-from .Simple_v_Analyzer import SA
-
-# from .formats import SUPPORTED_INPUT_FORMATS, SUPPORTED_OUTPUT_FORMATS
-handle_warnings
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Eval:
-
- def __init__(self, file, outf):
- self.file = file
- self.outf = outf
-
- def document_eval(self):
- ls = ["docx", "doc"]
- sheetls = ["xlsx", "xls"]
- try:
- conv = MakeConversion(self.file)
- if self.file.lower().endswith(tuple(sheetls)):
- if self.outf.lower() == "csv":
- conv.convert_xlsx_to_csv()
- elif self.outf.lower() == "txt":
- conv.convert_xls_to_text()
- elif self.outf.lower() == "doc" or self.outf == "docx":
- conv.convert_xls_to_word()
- elif self.outf.lower() == "db":
- conv.convert_xlsx_to_database()
-
- elif self.file.lower().endswith(tuple(ls)):
- if self.outf.lower() == "txt":
- conv.word_to_txt()
- elif self.outf.lower() == "pdf":
- conv.word_to_pdf()
- elif self.outf.lower() == "pptx":
- conv.word_to_pptx()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.endswith('txt'):
- if self.outf.lower() == "pdf":
- conv.txt_to_pdf()
- elif self.outf.lower() == "doc" or self.outf == "docx" or self.outf == "word":
- conv.text_to_word()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.lower().endswith('ppt') or self.file.lower().endswith('pptx'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.ppt_to_word()
-
- elif self.file.lower().endswith('pdf'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.pdf_to_word()
- elif self.outf.lower() == "txt":
- conv.pdf_to_txt()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- else:
- print(f"{DYELLOW}Unsupported Conversion type{RESET}")
- except Exception as e:
- logger.error(e)
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="Multimedia Element Operations")
-
- parser.add_argument(
- "--convert_doc", help=f"Converter document file(s) to different format ie pdf_to_docx.\
- example {DYELLOW}filemac --convert_doc example.docx -t pdf{RESET}")
-
- parser.add_argument(
- "--convert_audio", help=f"Convert audio file(s) to and from different format ie mp3 to wav\
- example {DYELLOW}filemac --convert_audio example.mp3 -t wav{RESET}")
-
- parser.add_argument(
- "--convert_video", help=f"Convert video file(s) to and from different format ie mp4 to mkv.\
- example {DYELLOW}filemac --convert_video example.mp4 -t mkv{RESET}")
-
- parser.add_argument(
- "--convert_image", help=f"Convert image file(s) to and from different format ie png to jpg.\
- example {DYELLOW}filemac --convert_image example.jpg -t png{RESET}")
-
- parser.add_argument(
-
- "--convert_doc2image", help=f"Convert documents to images ie png to jpg.\
- example {DYELLOW}filemac --convert_doc2image example.pdf -t png{RESET}")
-
- parser.add_argument("-xA", "--extract_audio",
- help=f"Extract audio from a video.\
- example {DYELLOW}filemac -xA example.mp4 {RESET}")
-
- parser.add_argument(
- "-Av", "--Analyze_video", help=f"Analyze a given video.\
- example {DYELLOW}filemac --analyze_video example.mp4 {RESET}")
-
- parser.add_argument("-t", "--target_format",
- help="Target format for conversion (optional)")
-
- parser.add_argument(
- "--resize_image", help=f"change size of an image compress/decompress \
- example {DYELLOW}filemac --resize_image example.png -t png {RESET}")
-
- parser.add_argument("-t_size", help="used in combination with resize_image \
- to specify target image size")
-
- parser.add_argument(
- "-S", "--scan", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scan example.pdf {RESET}")
-
- parser.add_argument(
- "-SA", "--scanAsImg", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scanAsImg example.pdf {RESET}")
-
- parser.add_argument("--OCR", help=f"Extract text from an image.\
- example {DYELLOW}filemac --OCR image.png{RESET}")
-
- args = parser.parse_args()
-
-
-# Call function to handle document conversion inputs before begining conversion
- if args.convert_doc == 'help':
- print(SUPPORTED_DOC_FORMATS)
- sys.exit(1)
- if args.convert_doc:
- ev = Eval(args.convert_doc, args.target_format)
- ev.document_eval()
-
-
-# Call function to handle video conversion inputs before begining conversion
- elif args.convert_video:
- if args.convert_video == 'help' or args.convert_video is None:
- print(SUPPORTED_VIDEO_FORMATS_SHOW)
- sys.exit(1)
- ev = VideoConverter(args.convert_video, args.target_format)
- ev.CONVERT_VIDEO()
-# Call function to handle image conversion inputs before begining conversion
-
- elif args.convert_image:
- if args.convert_image == 'help' or args.convert_image is None:
- print(SUPPORTED_IMAGE_FORMATS_SHOW)
- sys.exit(1)
- conv = ImageConverter(args.convert_image, args.target_format)
- conv.convert_image()
-
-# Handle image resizing
- elif args.resize_image:
- res = Compress_Size(args.resize_image)
- res.resize_image(args.t_size)
-
-# Handle documents to images conversion
- elif args.convert_doc2image:
- conv = MakeConversion(args.convert_doc2image)
- conv.doc2image(args.target_format)
-
-# Call function to handle audio conversion inputs before begining conversion
- elif args.convert_audio:
- if args.convert_audio == 'help' or args.convert_audio is None:
- print(SUPPORTED_AUDIO_FORMATS_SHOW)
- sys.exit(1)
- ev = AudioConverter(args.convert_audio, args.target_format)
- ev.pydub_conv()
-
-
-# Call module to evaluate audio files before making audio extraction from input video files conversion
- elif args.extract_audio:
- vi = ExtractAudio(args.extract_audio)
- vi.moviepyextract()
-
-# Call module to scan the input and extract text
- elif args.scan:
- sc = Scanner(args.scan)
- sc.scanPDF()
-
-# Call module to scan the input FILE as image object and extract text
- elif args.scanAsImg:
- sc = Scanner(args.scanAsImg)
- tx = sc.scanAsImgs()
-# Call module to handle Candidate images for text extraction inputs before begining conversion
- elif args.OCR:
- conv = ExtractText(args.OCR)
- conv.OCR()
-
- elif args.Analyze_video:
- analyzer = SA(args.Analyze_video)
- analyzer.SimpleAnalyzer()
-
-
-if __name__ == "__main__":
- main()
diff --git a/build/lib/filemac/formats.py b/build/lib/filemac/formats.py
deleted file mode 100644
index 6490294..0000000
--- a/build/lib/filemac/formats.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# multimedia_cli/formats.py
-from .colors import CYAN, DBLUE, DMAGENTA, DYELLOW, RESET
-
-SUPPORTED_DOC_FORMATS = f"""
-|---------------------------------------------------------------------------
-|{DBLUE}Input format{RESET} |{DBLUE}Output format{RESET} |
-|________________________________|__________________________________________|
-| xlsx {DYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) |
-| | |
-| doc/docx{DYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) |
-| | |
-| txt {DYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) |
-| | |
-| pdf {DYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) |
-| | |
-| pptx/ppt{DYELLOW}-------------------->{RESET}|doc/docx |
-| |
-|___________________________________________________________________________|
-"""
-
-
-def p():
- print(SUPPORTED_DOC_FORMATS)
-
-
-# Add supported input and output formats for each media type
-SUPPORTED_AUDIO_FORMATS = ["wav", # Waveform Audio File Format
- "mp3", # MPEG Audio Layer III
- "ogg",
- "flv",
- "ogv",
- "webm",
- "aac", # Advanced Audio Codec
- "bpf",
- "aiff",
- "flac"] # Free Lossless Audio Codec)
-
-SUPPORTED_AUDIO_FORMATS_SHOW = f'''
-|==============================|
-| {DBLUE}Supported I/O formats {RESET} |
-|==============================|
-| {CYAN} wav {DYELLOW} |
-| {CYAN} mp3 {DYELLOW} |
-| {CYAN} ogg {DYELLOW} |
-| {CYAN} flv {DYELLOW} |
-| {CYAN} ogv {DYELLOW} |
-| {CYAN} matroska {DYELLOW} |
-| {CYAN} mov {DYELLOW} |
-| {CYAN} webm {DYELLOW} |
-| {CYAN} aac {DYELLOW} |
-| {CYAN} bpf {DYELLOW} |
---------------------------------
-
-'''
-
-SUPPORTED_VIDEO_FORMATS = ["MP4", # MPEG-4 part 14
- "AVI", # Audio Video Interleave
- "OGV",
- "WEBM",
- "MOV", # QuickTime Movie
- "MKV", # Matroska Multimedia Container - MKV is known for its support of high-quality content.
- "FLV", #
- "WMV"]
-
-SUPPORTED_VIDEO_FORMATS_SHOW = f'''
-,_______________________________________,
-|x| {DBLUE}Supported I/O formats{RESET} |x|
-|x|-----------------------------------{DYELLOW}|x|
-|x| {DMAGENTA} MP4 {DYELLOW} |x|
-|x| {DMAGENTA} AVI {DYELLOW} |x|
-|x| {DMAGENTA} OGV {DYELLOW} |x|
-|x| {DMAGENTA} WEBM{DYELLOW} |x|
-|x| {DMAGENTA} MOV {DYELLOW} |x|
-|x| {DMAGENTA} MKV {DYELLOW} |x|
-|x| {DMAGENTA} FLV {DYELLOW} |x|
-|x| {DMAGENTA} WMV {DYELLOW} |x|
-|,|___________________________________|,|{DYELLOW}
-'''
-
-SUPPORTED_IMAGE_FORMATS = {
- "JPEG": ".jpg", # Joint Photographic Experts Group -Lossy compression
- "PNG": ".png", # Joint Photographic Experts Group - not lossy
- "GIF": ".gif", # Graphics Interchange Format
- "BM": ".bmp",
- "BMP": ".dib",
- "DXF": ".dxf", # Autocad format 2D
- "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression
- "EXR": ".exr",
- "pic": ".pic",
- "pict": "pct",
- "PDF": ".pdf",
- "WebP": ".webp",
- "ICNS": ".icns",
- "PSD": ".psd",
- "SVG": ".svg", # Scalable vector Graphics
- "EPS": ".eps",
- "PostSciript": ".ps",
- "PS": ".ps"}
-
-SUPPORTED_IMAGE_FORMATS_SHOW = f'''
-__________________________________________
-|x|{DBLUE}Supported I/O formats{RESET} |x|
-|x|_____________________________________{DYELLOW}|x|
-|x| {DMAGENTA} JPEG {DYELLOW} |x|
-|x| {DMAGENTA} PNG {DYELLOW} |x|
-|x| {DMAGENTA} GIF {DYELLOW} |x|
-|x| {DMAGENTA} BM {DYELLOW} |x|
-|x| {DMAGENTA} TIFF {DYELLOW} |x|
-|x| {DMAGENTA} EXR {DYELLOW} |x|
-|x| {DMAGENTA} PDF {DYELLOW} |x|
-|x| {DMAGENTA} WebP{DYELLOW} |x|
-|x| {DMAGENTA} ICNS {DYELLOW} |x|
-|x| {DMAGENTA} PSD {DYELLOW} |x|
-|x| {DMAGENTA} SVG {DYELLOW} |x|
-|x| {DMAGENTA} EPS {DYELLOW} |x|
-|x| {DMAGENTA} Postscript {DYELLOW} |x|
-|_|_____________________________________|x|
-'''
-
-SUPPORTED_DOCUMENT_FORMATS = ['pdf', 'doc', 'docx', 'csv', 'xlsx', 'xls',
- 'ppt', 'pptx', 'txt', 'ogg', 'mp3', 'audio']
diff --git a/build/lib/filemac/image_op.py b/build/lib/filemac/image_op.py
deleted file mode 100644
index 61cfe6d..0000000
--- a/build/lib/filemac/image_op.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from PIL import Image
-import os
-import logging
-import logging.handlers
-
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Compress_Size:
-
- def __init__(self, input_image_path):
- self.input_image_path = input_image_path
-
- def resize_image(self, target_size):
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- original_image = Image.open(input_image_path)
- original_size = original_image.size
- size = os.path.getsize(input_image_path)
- print(f"Original image size \033[93m{size/1000_000:.2f}MiB")
-
- # Calculate the aspect ratio of the original image
- aspect_ratio = original_size[0] / original_size[1]
-
- # Convert the target sixze to bytes
- tz = int(target_size[:-2])
- if target_size[-2:].lower() == 'mb':
- target_size_bytes = tz * 1024 * 1024
- elif target_size[-2:].lower() == 'kb':
- target_size_bytes = tz * 1024
- else:
- logger.warning("Invalid units. Please use either \033[1;95m'MB'\033[0m\
- or \033[1;95m'KB'\033[0m")
-
- # Calculate the new dimensions based on the target size
- new_width, new_height = Compress_Size.calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes)
- print("\033[94mProcessing ..\033[0m")
- resized_image = original_image.resize((new_width, new_height))
- resized_image.save(output_image_path)
- t_size = os.path.getsize(output_image_path)/1000_000
- print("\033[1;92mOk\033[0m")
- print(f"Image resized to \033[1;93m{t_size:.2f}\033[0m and saved to \033[1;93m{output_image_path}")
-
- def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes):
- # Calculate the new dimensions based on the target size in bytes
- original_size_bytes = original_size[0] * original_size[1] * 3 # Assuming 24-bit color depth
- scale_factor = (target_size_bytes / original_size_bytes) ** 0.5
-
- new_width = int(original_size[0] * scale_factor)
- new_height = int(original_size[1] * scale_factor)
-
- return new_width, new_height
-
-
-if __name__ == "__main__":
- input_image_path = input("Enter the path to the input image: ")
- target_size = input("Enter the target output size (MB or KB): ")
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- init = Compress_Size(input_image_path)
- init.resize_image(target_size)
diff --git a/docs/CLI_ENHANCEMENT_PLAN.md b/docs/CLI_ENHANCEMENT_PLAN.md
new file mode 100644
index 0000000..27e4e0e
--- /dev/null
+++ b/docs/CLI_ENHANCEMENT_PLAN.md
@@ -0,0 +1,342 @@
+# FileMAC CLI Enhancement Plan
+
+## Overview
+
+This document outlines the comprehensive plan to enhance FileMAC's command-line interface using Rich and pyperclip libraries to create a more robust, user-friendly experience.
+
+## Current State Analysis
+
+### Strengths
+- ✅ Rich library already integrated for progress bars
+- ✅ Pyperclip available in environment
+- ✅ Existing color support via custom utilities
+- ✅ Comprehensive functionality across 40+ commands
+- ✅ Well-structured operation mapping system
+
+### Opportunities for Improvement
+- ❌ Basic argparse interface could be more user-friendly
+- ❌ Text-based help lacks visual appeal
+- ❌ Limited interactive elements
+- ❌ No clipboard integration
+- ❌ Inconsistent progress feedback
+
+## Enhancement Strategy
+
+### Phase 1: Foundation (Week 1-2)
+
+**Objective**: Establish core utilities and infrastructure
+
+**Tasks**:
+1. **Create Rich Console Wrapper** (`filemac/utils/rich_utils.py`)
+ - Custom theme matching existing color scheme
+ - Standardized message formats (info, success, error, warning)
+ - Console initialization and configuration
+
+2. **Implement Clipboard Utilities** (`filemac/utils/clipboard.py`)
+ - `copy_to_clipboard()` function
+ - `paste_from_clipboard()` function
+ - Error handling for clipboard operations
+
+3. **Basic Rich Integration**
+ - Replace `print()` statements with Rich console methods
+ - Add color consistency across modules
+ - Create standard message formats
+
+### Phase 2: Core Enhancements (Week 3-4)
+
+**Objective**: Enhance core CLI functionality with Rich features
+
+**Tasks**:
+1. **Enhanced Help System** (`filemac/cli/help.py`)
+ - Rich-formatted command tables
+ - Categorized command display
+ - Interactive help navigation
+
+2. **Progress Bars for All Operations** (`filemac/utils/progress.py`)
+ - Standardized progress bar creation
+ - Consistent styling across modules
+ - Time estimates and completion percentages
+
+3. **Enhanced Error Handling** (Enhance `filemac/core/exceptions.py`)
+ - Rich-formatted error panels
+ - Contextual error information
+ - Suggested solutions and troubleshooting
+
+### Phase 3: Advanced Features (Week 5-6)
+
+**Objective**: Add interactive elements and workflow improvements
+
+**Tasks**:
+1. **Interactive File Selection** (`filemac/cli/interactive.py`)
+ - Visual file listing with tables
+ - Multi-file selection interface
+ - File preview capabilities
+
+2. **Clipboard Workflow Integration** (`filemac/cli/clipboard_workflows.py`)
+ - Clipboard-based input workflows
+ - Result copying to clipboard
+ - Batch operation support
+
+3. **Operation Summary Display** (`filemac/cli/summary.py`)
+ - Visual operation summaries
+ - Success/error breakdowns
+ - Clipboard copy options
+
+### Phase 4: Integration (Week 7)
+
+**Objective**: Full integration with existing CLI
+
+**Tasks**:
+1. **Enhanced CLI Entry Point** (Modify `filemac/cli/cli.py`)
+ - Rich welcome message
+ - Clipboard support flag
+ - Enhanced argument parsing
+
+2. **Operation Mapper Enhancement** (Extend `OperationMapper`)
+ - Rich progress display
+ - Clipboard integration
+ - Enhanced completion messages
+
+## Implementation Details
+
+### Rich Utilities Implementation
+
+```python
+# filemac/utils/rich_utils.py
+from rich.console import Console
+from rich.theme import Theme
+
+custom_theme = Theme({
+ "info": "cyan",
+ "warning": "yellow",
+ "error": "bold red",
+ "success": "bold green",
+ "debug": "magenta",
+ "prompt": "bold blue"
+})
+
+console = Console(theme=custom_theme)
+
+def print_info(message):
+ console.print(f"[info]ℹ {message}[/info]")
+
+def print_success(message):
+ console.print(f"[success]✓ {message}[/success]")
+
+def print_error(message):
+ console.print(f"[error]❌ {message}[/error]")
+
+def print_warning(message):
+ console.print(f"[warning]⚠ {message}[/warning]")
+```
+
+### Clipboard Utilities Implementation
+
+```python
+# filemac/utils/clipboard.py
+import pyperclip
+from .rich_utils import console, print_success, print_error
+
+def copy_to_clipboard(text):
+ """Copy text to system clipboard"""
+ try:
+ pyperclip.copy(text)
+ print_success("Copied to clipboard!")
+ return True
+ except Exception as e:
+ print_error(f"Failed to copy to clipboard: {str(e)}")
+ return False
+
+def paste_from_clipboard():
+ """Get text from system clipboard"""
+ try:
+ content = pyperclip.paste()
+ return content if content else None
+ except Exception as e:
+ print_error(f"Failed to access clipboard: {str(e)}")
+ return None
+```
+
+### Enhanced Help System
+
+```python
+# filemac/cli/help.py
+from rich.panel import Panel
+from rich.table import Table
+from rich.box import ROUNDED
+from .rich_utils import console
+
+def show_main_help():
+ """Display enhanced help with Rich formatting"""
+ table = Table(
+ title="📁 FileMAC Commands",
+ show_header=True,
+ header_style="bold magenta",
+ box=ROUNDED,
+ border_style="blue"
+ )
+
+ table.add_column("Command", style="cyan", no_wrap=True)
+ table.add_column("Description", style="white")
+ table.add_column("Example", style="green")
+
+ commands = [
+ ("--convert_doc", "Convert documents between formats", "filemac --convert_doc file.docx -to pdf"),
+ ("--convert_audio", "Convert audio files", "filemac --convert_audio file.mp3 -to wav"),
+ # ... more commands
+ ]
+
+ for cmd, desc, example in commands:
+ table.add_row(cmd, desc, example)
+
+ panel = Panel.fit(
+ table,
+ title="[bold]FileMAC Help System[/bold]",
+ border_style="blue",
+ subtitle="Advanced file conversion toolkit"
+ )
+
+ console.print(panel)
+```
+
+## Migration Strategy
+
+### Backward Compatibility
+- ✅ Keep all existing command-line arguments
+- ✅ Maintain current functionality
+- ✅ Add new features as optional flags
+- ✅ Preserve existing workflows
+
+### Gradual Rollout Plan
+1. **Week 1-2**: Foundation utilities
+2. **Week 3-4**: Core Rich enhancements
+3. **Week 5-6**: Advanced interactive features
+4. **Week 7**: Full integration and testing
+
+### Risk Assessment
+
+**Low Risk**:
+- Rich already in dependencies
+- Gradual migration approach
+- Backward compatibility maintained
+
+**Medium Risk**:
+- User adaptation to new UI
+- Clipboard permissions on some systems
+- Performance impact of Rich rendering
+
+**Mitigation**:
+- Provide fallback to text mode
+- Add configuration options
+- Comprehensive error handling
+- User education
+
+## Benefits Realization
+
+### Immediate Benefits
+- ✅ Better visual feedback for users
+- ✅ Professional, modern CLI appearance
+- ✅ Consistent color scheme and formatting
+- ✅ Enhanced error messages with context
+
+### Medium-Term Benefits
+- ✅ Faster workflows with clipboard integration
+- ✅ Better user experience with progress indicators
+- ✅ Interactive file selection and processing
+- ✅ Visual operation summaries
+
+### Long-Term Benefits
+- ✅ Foundation for advanced CLI features
+- ✅ Improved user adoption and satisfaction
+- ✅ Competitive advantage in CLI tools
+- ✅ Easier maintenance and extension
+
+## Testing Approach
+
+### Unit Testing
+- Test Rich utilities in isolation
+- Verify clipboard functionality
+- Validate progress bar behavior
+
+### Integration Testing
+- Test with existing CLI commands
+- Verify backward compatibility
+- Check error handling
+
+### User Testing
+- Gather feedback on new UI
+- Test interactive workflows
+- Validate clipboard integration
+
+### Performance Testing
+- Measure Rich rendering impact
+- Test with large file operations
+- Validate progress bar performance
+
+## Documentation Requirements
+
+### Updated Documentation
+- ✅ README.md with Rich features
+- ✅ Examples of new clipboard workflows
+- ✅ Visual guides for enhanced UI
+- ✅ Updated help system documentation
+
+### User Education
+- ✅ Migration guide for existing users
+- ✅ New feature tutorials
+- ✅ Best practices for Rich CLI usage
+- ✅ Troubleshooting guide
+
+## Implementation Timeline
+
+```mermaid
+gantt
+ title FileMAC CLI Enhancement Timeline
+ dateFormat YYYY-MM-DD
+ section Phase 1: Foundation
+ Rich Utilities :a1, 2023-11-01, 5d
+ Clipboard Helpers :a2, 2023-11-06, 3d
+ Basic Integration :a3, 2023-11-09, 2d
+
+ section Phase 2: Core Enhancements
+ Enhanced Help :b1, 2023-11-13, 4d
+ Progress Bars :b2, 2023-11-17, 3d
+ Error Handling :b3, 2023-11-20, 3d
+
+ section Phase 3: Advanced Features
+ Interactive Selection :c1, 2023-11-24, 5d
+ Clipboard Workflows :c2, 2023-11-29, 4d
+ Operation Summaries :c3, 2023-12-03, 3d
+
+ section Phase 4: Integration
+ CLI Enhancement :d1, 2023-12-06, 5d
+ Testing & Debugging :d2, 2023-12-11, 4d
+ Documentation :d3, 2023-12-15, 3d
+```
+
+## Success Metrics
+
+### Quantitative Metrics
+- ✅ Reduction in user errors
+- ✅ Increase in command usage
+- ✅ Faster operation completion times
+- ✅ Higher user satisfaction scores
+
+### Qualitative Metrics
+- ✅ Positive user feedback
+- ✅ Increased feature adoption
+- ✅ Improved documentation clarity
+- ✅ Enhanced professional appearance
+
+## Conclusion
+
+This enhancement plan provides a clear, low-risk path to transform FileMAC's CLI from functional to exceptional. By leveraging existing Rich integration and adding strategic pyperclip functionality, we can significantly improve user experience and productivity while maintaining all existing functionality.
+
+The gradual migration approach ensures minimal disruption and allows for continuous feedback and improvement throughout the process.
+
+**Next Steps**:
+1. Implement Phase 1 foundation utilities
+2. Begin gradual integration with existing modules
+3. Test thoroughly and gather user feedback
+4. Proceed through phases as planned
+5. Document and communicate changes effectively
\ No newline at end of file
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..2a73f80
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ FileMAC - Multimedia File Operation Kit
+
+
+
+
+
+
+
+
FileMAC
+
+ A Comprehensive Multimedia File Operation Kit
+
+
+
+
+
+
+
+
+
Introduction
+
+ FileMAC is a Python-based command-line interface (CLI) utility
+ designed for efficient file conversion, manipulation, and analysis. It
+ supports various multimedia operations, including document conversion,
+ file analysis, and text-to-speech conversion using Google's
+ Text-to-Speech (gTTS) library.
+
+
+
+
+
+
Features
+
+
Convert documents between various formats.
+
Analyze and manipulate multimedia files.
+
Generate audio files from text using gTTS.
+
+ Command-line interface for easy integration into scripts and
+ workflows.
+
+
Supports Linux operating systems.
+
+ Encapsulates reputable multimedia elements for robust performance.
+
+
+
+
+
+
+
Installation
+
Install FileMAC using pip:
+
pip install filemac
+
+ Alternatively, install directly from the GitHub repository:
+
Detail-oriented Electrical and Electronics Technician with specialized training in power systems and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance, troubleshooting, and circuit analysis.
+
+
+
+
EDUCATION
+
+ 2021 - 2024
+ Ikutha Technical and Vocational College
+ Diploma in Electrical and Electronics (Power Option)
+ Completed: April 3, 2024
+
+
+
+
+
PROFESSIONAL EXPERIENCE
+
+ May 2023 - July 2023
+ KenGen - Olkaria Geothermal Power Plants
+ Electrical Maintenance Intern
+
+
+
Performed maintenance of electrical systems and power distribution equipment
+
Maintained turbine generators and auxiliary systems
+ Detail-oriented Electrical and Electronics Technician with specialized training in power systems
+ and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance,
+ troubleshooting, and circuit analysis. Seeking to leverage technical expertise and problem-solving
+ abilities in a challenging electrical engineering role.
+
+
+
+
+
Education
+
+
+
2021 - 2024
+
Ikutha Technical and Vocational College
+
Diploma in Electrical and Electronics (Power Option)
+
Completed: April 3, 2024
+
+
+
+
January 2016 - November 2019
+
Kea Secondary School
+
Kenya Certificate of Secondary Education (KCSE)
+
Mean Grade: C- (Minus)
+
+
+
+
+
Technical Skills
+
+
Electrical System Maintenance
+
Power System Operations
+
Circuit Analysis
+
PLC Programming
+
Solar Installation
+
Transformer Maintenance
+
Battery Systems
+
Technical Reporting
+
+
+
+
+ """
diff --git a/filemac/core/html/styles/__init__.py b/filemac/core/html/styles/__init__.py
new file mode 100644
index 0000000..208878b
--- /dev/null
+++ b/filemac/core/html/styles/__init__.py
@@ -0,0 +1,4 @@
+from .css_parser import CSSParser
+from .style_applier import StyleApplier
+
+__all__ = ["CSSParser", "StyleApplier"]
diff --git a/filemac/core/html/styles/css_parser.py b/filemac/core/html/styles/css_parser.py
new file mode 100644
index 0000000..cbbffda
--- /dev/null
+++ b/filemac/core/html/styles/css_parser.py
@@ -0,0 +1,69 @@
+"""
+Advanced CSS parsing functionality
+"""
+
+import re
+from typing import Dict, List
+
+
+class CSSParser:
+ """Advanced CSS parser with support for various CSS features"""
+
+ def __init__(self):
+ self.styles = {}
+
+ def parse_css(self, css_content: str) -> Dict[str, Dict]:
+ """Parse CSS content into style dictionary"""
+ # Remove comments
+ css_content = re.sub(r"/\*.*?\*/", "", css_content, flags=re.DOTALL)
+
+ # Parse rules
+ rules = re.findall(r"([^{]+)\{([^}]+)\}", css_content)
+
+ for selector, properties in rules:
+ selector = selector.strip()
+ style_dict = self._parse_properties(properties)
+
+ if selector:
+ self.styles[selector] = style_dict
+
+ return self.styles
+
+ def _parse_properties(self, properties: str) -> Dict[str, str]:
+ """Parse CSS properties string"""
+ style_dict = {}
+ declarations = [d.strip() for d in properties.split(";") if d.strip()]
+
+ for declaration in declarations:
+ if ":" in declaration:
+ prop, value = declaration.split(":", 1)
+ prop = prop.strip().lower()
+ value = value.strip()
+ style_dict[prop] = value
+
+ return style_dict
+
+ def get_styles_for_element(
+ self, tag: str, classes: List[str] = None, element_id: str = None
+ ) -> Dict[str, str]:
+ """Get combined styles for an element based on tag, classes, and ID"""
+ combined_styles = {}
+
+ # Tag styles
+ if tag in self.styles:
+ combined_styles.update(self.styles[tag])
+
+ # Class styles
+ if classes:
+ for class_name in classes:
+ class_selector = f".{class_name}"
+ if class_selector in self.styles:
+ combined_styles.update(self.styles[class_selector])
+
+ # ID styles
+ if element_id:
+ id_selector = f"#{element_id}"
+ if id_selector in self.styles:
+ combined_styles.update(self.styles[id_selector])
+
+ return combined_styles
diff --git a/filemac/core/html/styles/style_applier.py b/filemac/core/html/styles/style_applier.py
new file mode 100644
index 0000000..feb7bb6
--- /dev/null
+++ b/filemac/core/html/styles/style_applier.py
@@ -0,0 +1,83 @@
+"""
+Style application logic for different CSS properties
+"""
+
+from docx.shared import Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from typing import Dict
+import re
+
+from ..utils.color_utils import ColorConverter
+
+
+class StyleApplier:
+ """Applies CSS styles to DOCX elements"""
+
+ def __init__(self):
+ self.color_converter = ColorConverter()
+
+ def apply_text_styles(self, run, styles: Dict[str, str]):
+ """Apply text-related styles to a run"""
+ for prop, value in styles.items():
+ self._apply_text_style(run, prop, value)
+
+ def _apply_text_style(self, run, prop: str, value: str):
+ """Apply a single text style property"""
+ try:
+ if prop == "color":
+ color = self.color_converter.parse_color(value)
+ if color:
+ run.font.color.rgb = color
+
+ elif prop == "font-size":
+ size = self._parse_font_size(value)
+ if size:
+ run.font.size = Pt(size)
+
+ elif prop == "font-family":
+ run.font.name = value.split(",")[0].strip().strip("\"'")
+
+ elif prop == "font-weight":
+ if value in ["bold", "bolder", "700", "800", "900"]:
+ run.font.bold = True
+ elif value in ["normal", "lighter", "400"]:
+ run.font.bold = False
+
+ elif prop == "font-style":
+ if value == "italic":
+ run.font.italic = True
+ elif value == "normal":
+ run.font.italic = False
+
+ elif prop == "text-decoration":
+ if "underline" in value:
+ run.font.underline = True
+ if "line-through" in value:
+ run.font.strike = True
+
+ elif prop == "text-transform":
+ if value == "uppercase":
+ run.text = run.text.upper()
+ elif value == "lowercase":
+ run.text = run.text.lower()
+ elif value == "capitalize":
+ run.text = run.text.title()
+
+ except Exception:
+ pass
+
+ def _parse_font_size(self, size_str: str) -> float:
+ """Parse font size to points"""
+ try:
+ if "px" in size_str:
+ return float(size_str.replace("px", "").strip()) * 0.75
+ elif "pt" in size_str:
+ return float(size_str.replace("pt", "").strip())
+ elif "em" in size_str:
+ return float(size_str.replace("em", "").strip()) * 11 # Default size
+ elif "%" in size_str:
+ return (float(size_str.replace("%", "").strip()) / 100) * 11
+ else:
+ return float(size_str)
+ except (ValueError, TypeError):
+ return None
diff --git a/filemac/core/html/tests.py b/filemac/core/html/tests.py
new file mode 100644
index 0000000..92b4488
--- /dev/null
+++ b/filemac/core/html/tests.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Test script for the CV Converter library
+"""
+
+import os
+import sys
+
+# Add the library to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "cv_converter"))
+
+from filemac.core.html import HTML2Word
+from filemac.core.html.examples.templates import Templates
+
+
+def test_basic_conversion():
+ """Test basic conversion"""
+ print("Testing basic CV conversion...")
+
+ converter = HTML2Word()
+ html_content = Templates.get_basic_template()
+
+ converter.convert(html_content, "test_basic_cv.docx")
+ print("✓ Basic CV created: test_basic_cv.docx")
+
+
+def test_advanced_conversion():
+ """Test advanced conversion with styling"""
+ print("Testing advanced CV conversion...")
+
+ converter = HTML2Word()
+ html_content = Templates.get_advanced_cv()
+
+ converter.convert(html_content, "test_advanced_cv.docx")
+ print("✓ Advanced CV created: test_advanced_cv.docx")
+
+
+def test_file_conversion():
+ """Test conversion from HTML file"""
+ print("Testing file-based conversion...")
+
+ # Create test HTML file
+ with open("test_cv.html", "w", encoding="utf-8") as f:
+ f.write(Templates.get_basic_template())
+
+ converter = HTML2Word()
+ converter.convert_file("test_cv.html", "test_file_cv.docx")
+ print("✓ File-based CV created: test_file_cv.docx")
+
+
+def main():
+ """Run all tests"""
+ print("CV Converter Library Test Suite")
+ print("=" * 40)
+
+ try:
+ test_basic_conversion()
+ test_advanced_conversion()
+ test_file_conversion()
+
+ print("\n" + "=" * 40)
+ print("All tests completed successfully! 🎉")
+ print("\nGenerated files:")
+ for file in [
+ "test_basic_cv.docx",
+ "test_advanced_cv.docx",
+ "test_file_cv.docx",
+ ]:
+ if os.path.exists(file):
+ print(f" - {file}")
+
+ except Exception as e:
+ print(f"\n❌ Error during testing: {e}")
+ import traceback
+
+ traceback.print_exc()
+
+
+if __name__ == "__main__":
+ # main()
+ converter = HTML2Word()
+ converter.convert_file("/home/skye/Downloads/MWG-CV.html", "test.docx")
diff --git a/filemac/core/html/utils/__init__.py b/filemac/core/html/utils/__init__.py
new file mode 100644
index 0000000..d779482
--- /dev/null
+++ b/filemac/core/html/utils/__init__.py
@@ -0,0 +1,9 @@
+from .color_utils import ColorConverter
+from .validation import validate_css, validate_html, validate_file_path
+
+__all__ = [
+ "ColorConverter",
+ "validate_css",
+ "validate_html",
+ "validate_file_path",
+]
diff --git a/filemac/core/html/utils/color_utils.py b/filemac/core/html/utils/color_utils.py
new file mode 100644
index 0000000..a13d09c
--- /dev/null
+++ b/filemac/core/html/utils/color_utils.py
@@ -0,0 +1,121 @@
+"""
+Color conversion and parsing utilities
+"""
+
+import re
+from docx.shared import RGBColor
+from typing import Optional
+
+
+class ColorConverter:
+ """Converts various color formats to RGBColor"""
+
+ def __init__(self):
+ self.named_colors = {
+ "black": RGBColor(0, 0, 0),
+ "white": RGBColor(255, 255, 255),
+ "red": RGBColor(255, 0, 0),
+ "green": RGBColor(0, 128, 0),
+ "blue": RGBColor(0, 0, 255),
+ "yellow": RGBColor(255, 255, 0),
+ "cyan": RGBColor(0, 255, 255),
+ "magenta": RGBColor(255, 0, 255),
+ "gray": RGBColor(128, 128, 128),
+ "grey": RGBColor(128, 128, 128),
+ "orange": RGBColor(255, 165, 0),
+ "purple": RGBColor(128, 0, 128),
+ "brown": RGBColor(165, 42, 42),
+ "pink": RGBColor(255, 192, 203),
+ "navy": RGBColor(0, 0, 128),
+ "teal": RGBColor(0, 128, 128),
+ "olive": RGBColor(128, 128, 0),
+ "maroon": RGBColor(128, 0, 0),
+ "silver": RGBColor(192, 192, 192),
+ "lime": RGBColor(0, 255, 0),
+ "aqua": RGBColor(0, 255, 255),
+ "fuchsia": RGBColor(255, 0, 255),
+ }
+
+ def parse_color(self, color_str: str) -> Optional[RGBColor]:
+ """
+ Parse color string and return RGBColor
+
+ Supports:
+ - Hex: #RRGGBB, #RGB
+ - RGB: rgb(r, g, b)
+ - RGBA: rgba(r, g, b, a) - alpha ignored
+ - Named colors: red, blue, etc.
+ """
+ if not color_str:
+ return None
+
+ color_str = color_str.strip().lower()
+
+ # Named colors
+ if color_str in self.named_colors:
+ return self.named_colors[color_str]
+
+ # Hex colors
+ hex_match = re.match(r"#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})", color_str)
+ if hex_match:
+ r, g, b = [int(x, 16) for x in hex_match.groups()]
+ return RGBColor(r, g, b)
+
+ # Short hex colors
+ short_hex_match = re.match(r"#([0-9a-f])([0-9a-f])([0-9a-f])", color_str)
+ if short_hex_match:
+ r, g, b = [int(x * 2, 16) for x in short_hex_match.groups()]
+ return RGBColor(r, g, b)
+
+ # RGB colors
+ rgb_match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str)
+ if rgb_match:
+ r, g, b = [int(x) for x in rgb_match.groups()]
+ return RGBColor(r, g, b)
+
+ # RGBA colors (ignore alpha)
+ rgba_match = re.match(
+ r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str
+ )
+ if rgba_match:
+ r, g, b = [int(x) for x in rgba_match.groups()[:3]]
+ return RGBColor(r, g, b)
+
+ # HSL colors (basic conversion)
+ hsl_match = re.match(r"hsl\(\s*(\d+)\s*,\s*(\d+)%\s*,\s*(\d+)%\s*\)", color_str)
+ if hsl_match:
+ h, s, l = [int(x) for x in hsl_match.groups()]
+ return self._hsl_to_rgb(h, s, l)
+
+ return None
+
+ def _hsl_to_rgb(self, h: int, s: int, l: int) -> RGBColor:
+ """Convert HSL color to RGB (simplified)"""
+ # Normalize values
+ h = h % 360
+ s = max(0, min(100, s)) / 100
+ l = max(0, min(100, l)) / 100
+
+ # Simplified conversion
+ c = (1 - abs(2 * l - 1)) * s
+ x = c * (1 - abs((h / 60) % 2 - 1))
+ m = l - c / 2
+
+ if 0 <= h < 60:
+ r, g, b = c, x, 0
+ elif 60 <= h < 120:
+ r, g, b = x, c, 0
+ elif 120 <= h < 180:
+ r, g, b = 0, c, x
+ elif 180 <= h < 240:
+ r, g, b = 0, x, c
+ elif 240 <= h < 300:
+ r, g, b = x, 0, c
+ else:
+ r, g, b = c, 0, x
+
+ r = int((r + m) * 255)
+ g = int((g + m) * 255)
+ b = int((b + m) * 255)
+
+ return RGBColor(r, g, b)
diff --git a/filemac/core/html/utils/validation.py b/filemac/core/html/utils/validation.py
new file mode 100644
index 0000000..4a0d2d6
--- /dev/null
+++ b/filemac/core/html/utils/validation.py
@@ -0,0 +1,93 @@
+"""
+Validation utilities for the converter
+"""
+
+import os
+import re
+from pathlib import Path
+
+
+def validate_html(html_content: str) -> bool:
+ """
+ Validate HTML content
+
+ Args:
+ html_content: HTML string to validate
+
+ Returns:
+ bool: True if valid
+
+ Raises:
+ ValueError: If HTML content is invalid
+ """
+ if not html_content or not isinstance(html_content, str):
+ raise ValueError("HTML content must be a non-empty string")
+
+ if len(html_content.strip()) == 0:
+ raise ValueError("HTML content cannot be empty or whitespace only")
+
+ # Basic check for HTML tags
+ if not re.search(r"<[^>]+>", html_content):
+ raise ValueError("HTML content must contain valid HTML tags")
+
+ return True
+
+
+def validate_file_path(file_path: str, file_type: str = "input") -> bool:
+ """
+ Validate file path
+
+ Args:
+ file_path: Path to validate
+ file_type: Type of file ('input' or 'output')
+
+ Returns:
+ bool: True if valid
+
+ Raises:
+ ValueError: If file path is invalid
+ FileNotFoundError: If input file doesn't exist
+ """
+ if (
+ not file_path
+ or not isinstance(file_path, str)
+ and not isinstance(file_path, Path)
+ ):
+ raise ValueError(f"{file_type} file path must be a non-empty string")
+
+ if file_type == "input":
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"Input file not found: {file_path}")
+
+ if not os.path.isfile(file_path):
+ raise ValueError(f"Input path is not a file: {file_path}")
+
+ elif file_type == "output":
+ output_dir = os.path.dirname(file_path)
+ if output_dir and not os.path.exists(output_dir):
+ try:
+ os.makedirs(output_dir)
+ except OSError as e:
+ raise ValueError(f"Cannot create output directory: {e}")
+
+ # Check file extension
+ if not file_path.lower().endswith((".html", ".htm", ".docx")):
+ raise ValueError(f"File must have .html, .htm, or .docx extension: {file_path}")
+
+ return True
+
+
+def validate_css(css_content: str) -> bool:
+ """
+ Validate CSS content
+
+ Args:
+ css_content: CSS string to validate
+
+ Returns:
+ bool: True if valid
+ """
+ if not css_content or not isinstance(css_content, str):
+ raise ValueError("CSS content must be a non-empty string")
+
+ return True
diff --git a/filemac/core/image/core.py b/filemac/core/image/core.py
new file mode 100644
index 0000000..c204d68
--- /dev/null
+++ b/filemac/core/image/core.py
@@ -0,0 +1,783 @@
+import shutil
+from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+import re
+from pathlib import Path
+from docx.shared import Inches, Mm
+from docx import Document
+import os
+import sys
+from tqdm import tqdm
+from PIL import Image
+import cv2
+from typing import List, Tuple, Union, Optional
+from ...utils.simple import logger
+from ...utils.decorators import Decorators
+from ...utils.formats import SUPPORTED_IMAGE_FORMATS
+from ...utils.file_utils import modify_filename_if_exists, DirectoryScanner
+from ...utils.colors import fg, rs
+
+RESET = rs
+
+
+class ImageCompressor:
+ def __init__(self, input_image_path):
+ self.input_image_path = input_image_path
+
+ def resize_image(self, target_size):
+ try:
+ input_image_path = self.input_image_path
+ ext = input_image_path[-3:]
+ output_image_path = (
+ os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
+ )
+
+ original_image = Image.open(input_image_path)
+ original_size = original_image.size
+ size = os.path.getsize(input_image_path)
+ print(f"Original image size {fg.YELLOW}{size / 1000_000:.2f}MiB{RESET}")
+
+ # Calculate the aspect ratio of the original image
+ aspect_ratio = original_size[0] / original_size[1]
+
+ # Convert the target sixze to bytes
+ tz = int(target_size[:-2])
+ if target_size[-2:].lower() == "mb":
+ target_size_bytes = tz * 1024 * 1024
+ elif target_size[-2:].lower() == "kb":
+ target_size_bytes = tz * 1024
+ else:
+ logger.warning(
+ f"Invalid units. Please use either {fg.BMAGENTA}'MB'{RESET}\
+ or {fg.BMAGENTA}'KB'{RESET}"
+ )
+
+ # Calculate the new dimensions based on the target size
+ new_width, new_height = ImageCompressor.calculate_new_dimensions(
+ original_size, aspect_ratio, target_size_bytes
+ )
+ print(f"{fg.BLUE}Processing ..{RESET}")
+ resized_image = original_image.resize((new_width, new_height))
+ resized_image.save(output_image_path, optimize=True, format="png")
+ t_size = os.path.getsize(output_image_path) / 1000_000
+ print(f"{fg.BGREEN}Ok{RESET}")
+ print(
+ f"Image resized to {fg.BYELLOW}{t_size:.2f}{RESET} and saved to {fg.BYELLOW}{output_image_path}"
+ )
+ except KeyboardInterrupt:
+ print("\nQuit⏹️")
+ sys.exit(1)
+ except KeyError:
+ print("KeyError")
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes):
+ try:
+ # Calculate the new dimensions based on the target size in bytes
+ original_size_bytes = (
+ original_size[0] * original_size[1] * 3
+ ) # Assuming 24-bit color depth
+ scale_factor = (target_size_bytes / original_size_bytes) ** 0.5
+
+ new_width = int(original_size[0] * scale_factor)
+ new_height = int(original_size[1] * scale_factor)
+
+ return new_width, new_height
+ except KeyboardInterrupt:
+ print("\nQuit⏹️")
+ sys.exit(1)
+ except KeyError:
+ print("KeyError")
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class ImageConverter:
+ """Convert images file to from one format to another"""
+
+ def __init__(self, input_file, out_format):
+ self.input_file = input_file
+ self.out_format = out_format
+
+ def preprocess(self) -> list:
+ try:
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ if os.listdir(self.input_file) is None:
+ print("Cannot work with empty folder")
+ sys.exit(1)
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+ except FileNotFoundError:
+ print("File not found❕")
+ sys.exit(1)
+
+ def convert_image(self) -> os.PathLike:
+ try:
+ input_list = self.preprocess()
+ out_f = self.out_format.upper()
+ out_f = "JPEG" if out_f == "JPG" else out_f
+ input_list = [
+ item
+ for item in input_list
+ if any(
+ item.lower().endswith(ext)
+ for ext in SUPPORTED_IMAGE_FORMATS.values()
+ )
+ ]
+ for file in tqdm(input_list):
+ if out_f.upper() in SUPPORTED_IMAGE_FORMATS:
+ _ = os.path.splitext(file)[0]
+ output_filename = _ + SUPPORTED_IMAGE_FORMATS[out_f].lower()
+ else:
+ print("Unsupported output format")
+ sys.exit(1)
+ """Load the image using OpenCV: """
+ img = cv2.imread(file)
+ """Convert the OpenCV image to a PIL image: """
+ pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+
+ pil_img.save(output_filename, out_f)
+
+ print(f"{fg.GREEN}Success{RESET}")
+
+ return output_filename
+ except KeyboardInterrupt:
+ print("\nQuit❕")
+ sys.exit(1)
+ except AssertionError:
+ print("Assertion failed.")
+ except KeyError:
+ print(
+ f"{fg.RED}ERROR:\tPending Implementation for{fg.ICYAN} {out_f} {fg.BWHITE}format{RESET}"
+ )
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class GrayscaleConverter:
+ """
+ Class for converting images to grayscale and saving the processed output.
+
+ Attributes:
+ input_obj (Optional[Union[list[str], str, os.PathLike]]): Input file(s) or directory.
+ output_file (Optional[Union[list[str], str, os.PathLike]]): Output file path or directory.
+ """
+
+ def __init__(
+ self,
+ input_obj: Union[List[str], Tuple[str], str, os.PathLike],
+ output_file: Optional[Union[list[str], str, os.PathLike]] = None,
+ ):
+ """
+ Initializes the GrayscaleConverter object.
+
+ Args:
+ input_obj: Input file(s) or directory.
+ output_file: Output file path or directory.
+ """
+ self.input_obj = input_obj
+ self.output_file = output_file
+
+ def get_output_file(
+ self, image_path: Optional[Union[str, os.PathLike]] = None
+ ) -> Union[str, os.PathLike]:
+ """
+ Computes the correct output file path for a given input file.
+
+ Args:
+ image_path: Path to the input file.
+
+ Returns:
+ The computed output file path.
+ """
+ logger.info(f"{fg.BWHITE}Obtaining output file name{RESET}")
+ if self.output_file and self.output_file.endswith(
+ tuple(SUPPORTED_IMAGE_FORMATS.values())
+ ):
+ return os.path.abspath(self.output_file)
+ if self.output_file:
+ return os.path.abspath(os.path.splitext(self.output_file)[0] + ".png")
+ if image_path:
+ return os.path.abspath(
+ os.path.splitext(os.path.basename(image_path))[0] + ".png"
+ )
+ return "default_output.txt"
+
+ def run(self):
+ """
+ Runs the image to grayscale conversion operation on the input files.
+
+ Applies the for_loop_decorator to process each image in the input list.
+ """
+ file_list = DirectoryScanner(self.input_obj).run()
+
+ @Decorators().for_loop_decorator(file_list)
+ def process_image(self, image_path):
+ """Processes a single image, converting it to grayscale and saving."""
+ try:
+ logger.info(f"{fg.YELLOW}Processing {fg.CYAN}{image_path}{RESET}")
+ img = cv2.imread(image_path)
+ if img is None:
+ raise FileNotFoundError(f"Could not read image: {image_path}")
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ _, thresh = cv2.threshold(
+ gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+ )
+ self.save_pil_image(thresh, image_path)
+ except FileNotFoundError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+ except Exception as e:
+ raise
+ logger.error(f"An unexpected error occurred: {fg.RED}{e}{RESET}")
+
+ process_image(self)
+
+ def save_pil_image(self, thresh, image_path):
+ """
+ Saves a NumPy array representing a grayscale image as a PIL Image.
+
+ Args:
+ thresh: The NumPy array representing the grayscale image.
+ image_path: The path of the original image, used to derive the output filename.
+ """
+ try:
+ img_pil = Image.fromarray(thresh)
+ filename = self.get_output_file(image_path)
+ filename = modify_filename_if_exists(filename)
+ img_pil.save(filename)
+ logger.info(f"{fg.GREEN}Image saved as {fg.BLUE}{filename}{RESET}")
+ except Exception as e:
+ raise
+ logger.error(f"Unable to save the image: {fg.RED}{e}{RESET}")
+
+
+class ImageDocxConverter:
+ """
+ A class for converting images to DOCX documents.
+ """
+
+ def __init__(
+ self,
+ image_list: Union[Tuple[str], List[str]] = None,
+ input_dir: Union[str, os.PathLike] = None,
+ output_path: Union[str, os.PathLike] = None,
+ image_size: Tuple[float, float] = (6, 8), # Default to 6x8 inches
+ margin_mm: float = 25, # Default margin of 25mm (approx 1 inch)
+ ) -> None:
+ """
+ Initializes the ImageToDocxConverter object.
+
+ Args:
+ output_path: Path to save the output DOCX file + the file name e.g ~/Document/output.docx.
+ filename: Name of the output DOCX file.
+ image_size: Tuple (width, height) in inches.
+ margin_mm: Margin in millimeters.
+ """
+ self.image_list = image_list
+ self.input_dir = input_dir
+ self.output_path = output_path if output_path else self.ensure_output_file()
+ self.image_size = image_size
+ self.margin_mm = margin_mm
+ self.document = Document() # Create a new document object filename
+
+ # Set document margins in the constructor
+ sections = self.document.sections
+ for section in sections:
+ section.top_margin = Mm(self.margin_mm)
+ section.bottom_margin = Mm(self.margin_mm)
+ section.left_margin = Mm(self.margin_mm)
+ section.right_margin = Mm(self.margin_mm)
+ self.create_output_directory() # Create output directory in constructor
+
+ def ensure_output_file(self) -> os.PathLike:
+ file_name = "filemac_image2docx.docx"
+ if self.input_dir:
+ base_dir = self.input_dir
+ else:
+ base_dir = Path(self.image_list[0]).parent
+
+ file_path = os.path.join(base_dir, file_name)
+
+ return file_path
+
+ def create_output_directory(self) -> None:
+ """
+ Creates the output directory if it does not exist.
+ """
+ Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+
+ def get_valid_images(self, image_paths: List[str]) -> List[str]:
+ """
+ Filters the list of image paths, returning only those with supported formats.
+
+ Args:
+ image_paths: A list of file paths to images.
+
+ Returns:
+ A list of file paths to valid images.
+ """
+ valid_images = []
+ for image_path in image_paths:
+ try:
+ if Image.open(image_path).format.lower() in [
+ _formats[1:] for _formats in SUPPORTED_IMAGE_FORMATS.values()
+ ]:
+ valid_images.append(image_path)
+ else:
+ print(
+ f"{fg.MAGENTA}Skipping unsupported image format: {fg.CYAN}{image_path}{RESET}"
+ )
+ except Exception as e:
+ print(
+ f"{fg.RED}Error processing image {fg.YELLOW}{image_path} - {fg.RED} {e}{RESET}"
+ )
+ return valid_images
+
+ def convert_images_to_docx(self, image_paths: List[str]) -> os.PathLike:
+ """
+ Converts a list of images to a single DOCX document.
+
+ Args:
+ image_paths: List of image file paths.
+ """
+
+ valid_images = self.get_valid_images(image_paths)
+ if not valid_images:
+ print("No valid images to convert.")
+ return
+
+ for image_path in valid_images:
+ try:
+ # Add a paragraph for each image
+ paragraph = self.document.add_paragraph()
+ run = paragraph.add_run()
+ run.add_picture(
+ image_path,
+ width=Inches(self.image_size[0]),
+ height=Inches(self.image_size[1]),
+ )
+ # Add a page break after each image, except the last one
+ if image_path != valid_images[-1]:
+ self.document.add_page_break()
+ except Exception as e:
+ print(
+ f"{fg.RED}Error processing image {fg.YELLOW}{image_path}:{fg.RED} {e}{RESET}"
+ )
+
+ docx_file_path = (
+ self.output_path
+ if self.output_path.endswith(("docx", "doc"))
+ else f"{self.output_path}.docx"
+ )
+ self.document.save(docx_file_path)
+ return docx_file_path
+
+ def convert_images_in_directory(self, input_dir, output_path) -> os.PathLike:
+ """
+ Converts all images in a directory to a PDF.
+
+ Args:
+ input_dir (str): The directory containing the images.
+ output_path (str): The path to save the generated Word File.
+ file_extensions (tuple, optional): Tuple of image file extensions to include.
+ """
+
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ image_paths = sorted(
+ [os.path.join(input_dir, f) for f in os.listdir(input_dir)]
+ )
+
+ image_paths = self.get_valid_images(image_paths)
+
+ if not image_paths:
+ raise ValueError(f"No images found in directory: {input_dir}")
+
+ self.create_pdf_from_images(image_paths, output_path)
+ return output_path
+
+ def run(self) -> os.PathLike:
+ """
+ Runs the conversion process.
+
+ Args:
+ image_paths: List of image file paths to convert.
+ """
+ if not any((self.image_list, self.input_dir)):
+ print("No image paths provided.")
+ sys.exit()
+
+ if self.image_list and self.output_path:
+ if all(os.path.exists(img) for img in self.image_list):
+ docx_file_path = self.convert_images_to_docx(self.image_list)
+ elif self.input_dir and self.output_path:
+ if os.path.exists(self.input_dir):
+ docx_file_path = self.convert_images_in_directory(
+ self.input_dir, self.output_path
+ )
+
+ if docx_file_path:
+ print(
+ f"{fg.GREEN_RG}Successfully created DOCX: {fg.BLUE}{docx_file_path}{RESET}"
+ )
+
+ return docx_file_path
+
+ def cli(self, args: List[str]) -> None:
+ """
+ Main function to parse command line arguments and perform the conversion.
+
+ Args:
+ args: List of command line arguments.
+ """
+ if not args or "-h" in args or "--help" in args:
+ print(
+ """
+ Usage: python image_to_docx.py [options] image1 image2 ... imageN
+
+ Options:
+ -h, --help show this help message and exit
+ -o, --output PATH path to save the output DOCX file (default: current directory)
+ -n, --name FILENAME name of the output DOCX file (default: output_document)
+ -s, --size WIDTHxHEIGHT size of images in inches (e.g., 6x8) (default: 6x8)
+ -m, --margin MARGIN_MM margin in millimeters (default: 25)
+ """
+ )
+ sys.exit()
+
+ image_paths = []
+ output_path = "." # Current directory
+ filename = "output_document"
+ image_size = (6, 8) # Default 6x8 inches
+ margin_mm = 25
+
+ i = 1
+ while i < len(args):
+ if args[i] in ("-o", "--output"):
+ output_path = args[i + 1]
+ i += 2
+ elif args[i] in ("-n", "--name"):
+ filename = args[i + 1]
+ i += 2
+ elif args[i] in ("-s", "--size"):
+ try:
+ size_str = args[i + 1]
+ width, height = map(float, size_str.split("x"))
+ image_size = (width, height)
+ except ValueError:
+ print("Invalid size format. Please use WIDTHxHEIGHT (e.g., 6x8).")
+ sys.exit(1)
+ i += 2
+ elif args[i] in ("-m", "--margin"):
+ try:
+ margin_mm = float(args[i + 1])
+ except ValueError:
+ print("Invalid margin format. Please provide a numeric value.")
+ sys.exit(1)
+ i += 2
+ else:
+ if not args[i].startswith("-"):
+ image_paths.append(args[i])
+ i += 1
+ else:
+ print(f"Unknown argument: {args[i]}")
+ sys.exit(1)
+
+ converter = ImageDocxConverter(output_path, filename, image_size, margin_mm)
+ converter.run(image_paths)
+
+
+class ImagePdfConverter:
+ """
+ A class for converting images to PDF.
+ """
+
+ def __init__(
+ self,
+ image_list: Union[List[str], Tuple[str]] = None,
+ input_dir=None,
+ output_pdf_path=None,
+ page_size=letter,
+ order: bool = False,
+ base: bool = False,
+ walk: bool = False,
+ clean: bool = False,
+ ):
+ self.image_list = image_list
+ self.input_dir = input_dir
+ self.page_size = page_size
+ self.order = order
+ self.base = base
+ self.walk = walk
+ self.clean = clean
+ self.output_pdf_path = (
+ output_pdf_path if output_pdf_path else self.ensure_output_file()
+ )
+
+ def ensure_output_file(self) -> os.PathLike:
+ file_name = "filemac_image2pdf.pdf"
+ if self.input_dir:
+ base_dir = self.input_dir
+ if self.base:
+ one_file = os.listdir(self.input_dir)[0]
+ base_name, ext = os.path.splitext(one_file)
+ if "_img_" in base_name:
+ base_name = base_name.split("_img_")[0]
+ file_name = base_name + ".pdf"
+ else:
+ file_name = self.input_dir.split("_imgs")[0] + ".pdf"
+ else:
+ base_dir = Path(self.image_list[0]).parent
+
+ file_path = os.path.join(base_dir, file_name)
+
+ return file_path
+
+ def _clean(self, dirs: list):
+ print(f"{fg.UWHITE}{fg.BWHITE}Clean Images Host dir{fg.RESET}")
+ for d in dirs:
+ abspath = os.path.abspath(d)
+ print(f"{fg.BWHITE}Nuke: {fg.BYELLOW}{abspath}{fg.RESET}")
+ # print(Path(d).is_relative_to(os.path.expanduser("~")))
+ if (
+ os.path.exists(d) and os.path.isdir(d)
+ # and Path(d).is_relative_to(os.path.expanduser("~"))
+ ):
+ shutil.rmtree(abspath)
+
+ def create_pdf_from_images(
+ self, image_paths, output_pdf_path, resize_to_fit=True
+ ) -> os.PathLike:
+ """
+ Creates a PDF from a list of image paths.
+
+ Args:
+ image_paths (list): A list of image file paths.
+ output_pdf_path (str): The path to save the generated PDF.
+ resize_to_fit (bool, optional): Whether to resize images to fit the page. Defaults to True.
+
+ Raises:
+ FileNotFoundError: If any image path is invalid.
+ ValueError: If image_paths is empty or contains non-image files.
+ Exception: for pillow image opening errors, or reportlab canvas errors.
+ """
+
+ if not image_paths:
+ raise ValueError("Image paths list is empty.")
+
+ for image_path in image_paths:
+ if not os.path.exists(image_path):
+ raise FileNotFoundError(f"Image not found: {image_path}")
+ try:
+ Image.open(image_path)
+ except Exception as e:
+ raise ValueError(f"Error opening image {image_path}: {e}")
+
+ try:
+ c = canvas.Canvas(output_pdf_path, pagesize=self.page_size)
+ width, height = self.page_size
+
+ for image_path in image_paths:
+ img = Image.open(image_path)
+ img_width, img_height = img.size
+
+ if resize_to_fit:
+ ratio = min(width / img_width, height / img_height)
+ new_width = img_width * ratio
+ new_height = img_height * ratio
+ x = (width - new_width) / 2
+ y = (height - new_height) / 2
+ else:
+ x = (width - img_width) / 2
+ y = (height - img_height) / 2
+ new_width = img_width
+ new_height = img_height
+
+ c.drawImage(
+ image_path,
+ x,
+ y,
+ width=new_width,
+ height=new_height,
+ preserveAspectRatio=True,
+ )
+ c.showPage()
+
+ c.save()
+
+ return output_pdf_path
+ except Exception as e:
+ raise Exception(f"Error creating PDF: {e}")
+
+ @staticmethod
+ def ensure_format(input_image) -> os.PathLike:
+ from ..imagepy.converter import ImageConverter
+
+ converter = ImageConverter(input_image, "png")
+ output_image = converter.convert_image()
+ return output_image
+
+ def extract_img_number(self, filename):
+ match = re.search(r"_img_(\d+)", filename)
+ return int(match.group(1)) if match else float("inf")
+
+ def _sort(self, obj, ext):
+ if self.order:
+ if isinstance(obj, list):
+ return sorted(
+ obj,
+ key=lambda f: self.extract_img_number(f),
+ )
+ return sorted(
+ [
+ os.path.join(obj, f)
+ for f in os.listdir(obj)
+ if f.lower().endswith(ext)
+ ],
+ key=lambda f: self.extract_img_number(f),
+ )
+ else:
+ return sorted(
+ [
+ os.path.join(obj, f)
+ for f in os.listdir(obj)
+ if f.lower().endswith(ext)
+ ]
+ )
+
+ def convert_images_in_directory_recursive(
+ self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png")
+ ):
+ """
+ Recursively walks through a directory and its subdirectories,
+ converting images in each folder into a separate PDF.
+
+ Args:
+ input_dir (str): Root directory containing images.
+ output_root (str): Directory to save the generated PDFs.
+ file_extensions (tuple): Supported image extensions.
+ """
+ try:
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ # if not os.path.exists(output_root):
+ # os.makedirs(output_root)
+ dclean = []
+ for root, _, files in os.walk(input_dir):
+ image_paths = [
+ os.path.join(root, f)
+ for f in files
+ if f.lower().endswith(file_extensions)
+ ]
+
+ if not image_paths:
+ continue # No valid images in this directory
+
+ # Optional: sort images with your custom logic
+ image_paths = self._sort(image_paths, file_extensions)
+
+ # Ensure formats are valid
+ for index, image in enumerate(image_paths):
+ if not image.lower().endswith(file_extensions):
+ image_paths[index] = self.ensure_format(image)
+
+ # Create a relative PDF name based on the subdir structure
+ fname = os.path.split(root)[-1].split("_imgs")[0] + ".pdf"
+ relative_path = os.path.join(
+ os.path.dirname((os.path.relpath(root, input_dir))), fname
+ )
+ # Host dir for images to be cleaned is clean is on
+ dname = os.path.relpath(root, input_dir)
+ dclean.append(dname)
+
+ # pdf_name = relative_path.replace(os.sep, "_") + ".pdf"
+ # pdf_output_path = os.path.join(output_root, pdf_name)
+
+ # Create the PDF for this folder
+ self.create_pdf_from_images(image_paths, relative_path)
+ print(f"{fg.BWHITE}Created PDF{RESET}: {relative_path}")
+ if self.clean:
+ self._clean(dclean)
+ except Exception as e:
+ print(f"\033[31m{e}\033[0m")
+ sys.exit(1)
+
+ def convert_images_in_directory(
+ self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png")
+ ) -> os.PathLike:
+ try:
+ """
+ Converts all images in a directory to a PDF.
+
+ Args:
+ input_dir (str): The directory containing the images.
+ output_pdf_path (str): The path to save the generated PDF.
+ file_extensions (tuple, optional): Tuple of image file extensions to include.
+ """
+
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ image_paths = self._sort(input_dir, ext=file_extensions)
+
+ for index, image in enumerate(image_paths):
+ if not image.endswith(file_extensions):
+ image_paths[index] = self.ensure_format(image)
+
+ if not image_paths:
+ raise ValueError(
+ f"\033[31mNo images found in directory:\033[1m {input_dir}\033[0m"
+ )
+
+ self.create_pdf_from_images(image_paths, output_pdf_path)
+ return output_pdf_path
+ except ValueError as e:
+ print(e)
+ sys.exit(1)
+
+ def run(self) -> os.PathLike:
+ """
+ Runs the PDF creation based on the object's initialization parameters.
+ """
+ if self.image_list and self.output_pdf_path:
+ if all(os.path.exists(img) for img in self.image_list):
+ output_pdf_path = self.create_pdf_from_images(
+ self.image_list, self.output_pdf_path
+ )
+ print(f"{fg.GREEN}PDF created successfully from directory!{RESET}")
+ print(f"{fg.GREEN}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}")
+ else:
+ print(f"{fg.RED}One or more images in the list do not exist.{RESET}")
+ elif self.input_dir and self.output_pdf_path:
+ if os.path.exists(self.input_dir):
+ if self.walk:
+ output_pdf_path = self.convert_images_in_directory_recursive(
+ self.input_dir, self.output_pdf_path
+ )
+ else:
+ output_pdf_path = self.convert_images_in_directory(
+ self.input_dir, self.output_pdf_path
+ )
+ print(f"{fg.GREEN}PDF created successfully from directory!{RESET}")
+ print(
+ f"{fg.BWHITE}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}"
+ )
+ else:
+ print(f"Directory {fg.YELLOW}{self.input_dir}{RESET} does not exist.")
+ else:
+ print(
+ "Please provide either image_list and output_pdf_path or input_dir and output_pdf_path during object instantiation."
+ )
+ return
+ return output_pdf_path
diff --git a/filemac/core/image/extractor.py b/filemac/core/image/extractor.py
new file mode 100644
index 0000000..c12872c
--- /dev/null
+++ b/filemac/core/image/extractor.py
@@ -0,0 +1,269 @@
+import sys
+import fitz # PyMuPDF for PDF
+from docx import Document
+from PIL import Image
+from io import BytesIO
+from typing import List, Union, Tuple
+from pathlib import Path
+import os
+from ...utils.colors import fg, rs
+from ...utils.file_utils import dirbuster
+
+RESET = rs
+
+
+class ImageExtractor:
+ """
+ Base class for extracting images from document files.
+ """
+
+ def __init__(self, output_path: str = None, tsize: tuple = (20, 20)) -> None:
+ """
+ Initializes the ImageExtractor object.
+
+ Args:
+ output_path: Path to save the extracted images.
+ """
+ base_path = (
+ os.path.join(output_path, "FilemacExctracts")
+ if output_path
+ else os.path.join(os.path.abspath(os.getcwd()), "FilemacExctracts")
+ )
+ self.output_path = base_path
+ self.tsize = tsize
+ self.output_base = None
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from the given file. This is a placeholder
+ for the actual extraction logic, to be implemented by
+ subclasses.
+
+ Args:
+ file_path: Path to the document file.
+
+ Returns:
+ A list of PIL Image objects. Returns an empty list if no images
+ are found or if there is an error.
+ """
+ raise NotImplementedError("Subclasses must implement this method")
+
+ def extract_and_save_images(self, file_path: str) -> None:
+ """
+ Extracts and saves images from the given file.
+
+ Args:
+ file_path: Path to the document file.
+ """
+ images = self._extract_images(file_path)
+ self.output_base = os.path.split(file_path)[0]
+ if not images:
+ print(f"No images found in {file_path}")
+ return
+
+ base_filename = Path(file_path).stem
+ self._save_images(images, base_filename)
+
+ def is_page_sized_image(self, img, target_size=(595, 842), tolerance=1):
+ """Check if image is approximately page-sized (default: A4 at 72 DPI)."""
+ img_width, img_height = img.size
+ target_width, target_height = self.tsize if self.tsize else target_size
+
+ within_width = (
+ img_width > target_width
+ ) # abs(img_width - target_width) >= target_width * tolerance
+ within_height = (
+ img_height > target_height
+ # abs(img_height - target_height) >= target_height * tolerance
+ )
+
+ return within_width and within_height
+
+ def _save_images(self, images: List[Image.Image], base_filename: str) -> None:
+ """
+ Saves the extracted images to the output directory.
+
+ Args:
+ images: A list of PIL Image objects.
+ base_filename: The base filename to use when saving images (e.g., 'page_1').
+ """
+ self.output_path = os.path.join(self.output_base, f"{base_filename}_imgs")
+ os.makedirs(self.output_path, exist_ok=True) # Ensure directory exists
+
+ for i, img in enumerate(images):
+ try:
+ if self.tsize and not self.is_page_sized_image(img):
+ print(
+ f"Skipping image {i + 1}: ({fg.CYAN}{img.size}{RESET}) <= {fg.BLUE}{self.tsize}{RESET}"
+ )
+ continue
+
+ # Generate a unique filename for each image
+ img_format = img.format or "PNG" # Default to PNG if format is None
+ safe_filename = f"{base_filename}_img_{i + 1}.{img_format.lower()}"
+
+ img_path = Path(self.output_path) / safe_filename
+ img.save(img_path)
+ print(f"Saved image: {fg.GREEN}{img_path}{RESET}")
+ except Exception as e:
+ raise
+ print(f"Error saving image {i + 1} from {base_filename}: {e}")
+
+
+class PdfImageExtractor(ImageExtractor):
+ """
+ Extracts images from PDF files.
+ """
+
+ def __init__(self, output_path, size):
+ super().__init__(
+ output_path, size or (20, 20)
+ ) # Call Parent.__init__ with value
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from a PDF file using PyMuPDF.
+
+ Args:
+ file_path: Path to the PDF file.
+
+ Returns:
+ A list of PIL Image objects.
+ """
+ print(f"{fg.BWHITE}File: {fg.BLUE}{file_path}{RESET}")
+ images: List[Image.Image] = []
+ try:
+ pdf_document = fitz.open(file_path)
+ for page_index in range(len(pdf_document)):
+ page = pdf_document.load_page(page_index)
+ image_list = page.get_images(full=True) # Get detailed image info
+ for img_index, img_info in enumerate(image_list):
+ xref = img_info[0] # Get the XREF of the image
+ base_image = pdf_document.extract_image(xref)
+ image_bytes = base_image["image"]
+ try:
+ pil_image = Image.open(BytesIO(image_bytes))
+ images.append(pil_image)
+ except Exception as e:
+ print(
+ f"Error processing image {img_index + 1} from PDF page {page_index + 1}: {e}"
+ )
+ pdf_document.close()
+ except Exception as e:
+ print(f"Error processing PDF file: {file_path} - {e}")
+ return images
+
+
+class DocxImageExtractor(ImageExtractor):
+ """
+ Extracts images from DOCX files.
+ """
+
+ def __init__(self, output_path, size):
+ super().__init__(
+ output_path, size or (20, 20)
+ ) # Call Parent.__init__ with value
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from a DOCX file.
+
+ Args:
+ file_path: Path to the DOCX file.
+
+ Returns:
+ A list of PIL Image objects.
+ """
+ images: List[Image.Image] = []
+ try:
+ docx_document = Document(file_path)
+ for part in docx_document.part.rels.values():
+ if "image" in part.target_ref:
+ image_bytes = part.target_part.blob
+ try:
+ pil_image = Image.open(BytesIO(image_bytes))
+ images.append(pil_image)
+ except Exception as e:
+ print(f"Error processing image from DOCX: {e}")
+ except Exception as e:
+ print(f"Error processing DOCX file: {file_path} - {e}")
+ return images
+
+
+def process_files(
+ file_paths: Union[Tuple[str], List[str]],
+ output_path: str = os.getcwd(),
+ tsize: tuple = None,
+) -> None:
+ """
+ Processes the given files and extracts images from them.
+
+ Args:
+ file_paths: List of paths to the files to process.
+ output_path: Path to save the extracted images.
+ """
+ try:
+ for file_path in file_paths:
+ if os.path.isdir(file_path):
+ files = dirbuster(file_path)
+ process_files(files, tsize=tsize)
+ if file_path.lower().endswith(".pdf"):
+ extractor = PdfImageExtractor(output_path, tsize)
+ extractor.extract_and_save_images(file_path)
+ elif file_path.lower().endswith((".docx")):
+ extractor = DocxImageExtractor(output_path, tsize)
+ extractor.extract_and_save_images(file_path)
+ else:
+ print(f"Skipping unsupported file format: {file_path}")
+ except KeyboardInterrupt:
+ print("\nQuit")
+ sys.exit()
+
+
+def main(args: List[str]) -> None:
+ """
+ Main function to parse command line arguments and perform image extraction.
+
+ Args:
+ args: List of command line arguments.
+ """
+ if not args or "-h" in args or "--help" in args:
+ print(
+ """
+ Usage: python extract_images.py [options] file1 file2 ... fileN
+
+ Options:
+ -h, --help show this help message and exit
+ -o, --output PATH path to save the extracted images (default: extracted_images)
+ """
+ )
+ sys.exit()
+
+ file_paths = []
+ output_path = "extracted_images" # Default output path
+
+ i = 1
+ while i < len(args):
+ if args[i] in ("-o", "--output"):
+ output_path = args[i + 1]
+ i += 2
+ else:
+ if not args[i].startswith("-"):
+ file_paths.append(args[i])
+ i += 1
+ else:
+ print(f"Unknown argument: {args[i]}")
+ sys.exit(1)
+
+ file_paths.append(
+ "/home/skye/Downloads/KDEConnect/SPE 2304 Server Side Programming Year III Semester II.pdf"
+ )
+ if not file_paths:
+ print("No files provided for image extraction.")
+ sys.exit(1)
+
+ process_files(file_paths, output_path)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/filemac/core/ocr.py b/filemac/core/ocr.py
new file mode 100644
index 0000000..4fbe426
--- /dev/null
+++ b/filemac/core/ocr.py
@@ -0,0 +1,199 @@
+import logging
+import os
+import sys
+from typing import Union, List, Optional
+
+import cv2
+import pytesseract
+from PIL import Image
+from rich.progress import Progress
+from ..utils.colors import fg, bg, rs
+from ..utils.file_utils import modify_filename_if_exists, DirectoryScanner
+
+
+RESET = rs
+
+# Define constants for better readability and maintainability
+SUPPORTED_IMAGE_FORMATS = {"png", "jpg", "jpeg"}
+DEFAULT_CONFIG = "-l eng --oem 3 --psm 6"
+DEFAULT_SEPARATOR = "\n"
+
+# Configure logging at the module level
+logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+class ExtractText:
+ """
+ Extracts text from images using OCR, with options for file/directory input,
+ output file naming, and text separation.
+ """
+
+ def __init__(
+ self,
+ input_obj: Optional[Union[list[str], tuple[str], str, os.PathLike]],
+ sep: str = DEFAULT_SEPARATOR,
+ ):
+ """
+ Initializes the ExtractText object.
+
+ Args:
+ input_obj: Path to the image file or directory containing images.
+ sep: Separator to use when joining extracted text. Defaults to newline.
+ """
+ if not isinstance(input_obj, (str, list, os.PathLike)):
+ raise TypeError(
+ f"input_obj must be a string or os.PathLike, not {type(input_obj)}"
+ )
+ self.input_obj = input_obj
+ self.sep = sep
+ self.sep = (
+ "\n"
+ if self.sep == "newline"
+ else (
+ "\t"
+ if self.sep == "tab"
+ else (
+ " "
+ if self.sep == "space"
+ else ("" if self.sep == "none" else self.sep)
+ )
+ )
+ )
+
+ """
+ separator_map = {
+ "newline": "\n",
+ "tab": "\t",
+ "space": " ",
+ "none": "",
+ }
+
+ self.sep = separator_map.get(self.sep, self.sep)
+ """
+
+ def _process_image(self, image_path: str, output_file: str) -> str:
+ """
+ Extracts text from a single image and saves it to a file.
+
+ Args:
+ image_path: Path to the image file.
+ output_file: Path to the output text file.
+
+ Returns:
+ The extracted text. Returns an empty string on error.
+ """
+ try:
+ # Load image using OpenCV
+ img = cv2.imread(image_path)
+ if img is None:
+ raise ValueError(f"Could not read image: {image_path}")
+
+ # Preprocess image for better OCR results
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+ img_pil = Image.fromarray(thresh)
+
+ # Perform OCR using pytesseract
+ self.sep = (
+ self.sep.replace("\r\n", "\n")
+ .replace("\\n", "\n")
+ .replace("\r", "\n")
+ .replace("\r\t", "\t")
+ .replace("\\t", "\t")
+ )
+
+ text = pytesseract.image_to_string(img_pil, config=DEFAULT_CONFIG)
+ text = self.sep.join(text.splitlines()) # handle empty lines
+ logger.info("")
+ logger.info(f"Extracted text from {image_path}")
+ print(f"{fg.YELLOW}{text}{RESET}")
+
+ # Save text to file
+ with open(output_file, "w", encoding="utf-8") as file: # Specify encoding
+ file.write(text)
+ return text
+
+ except FileNotFoundError as e:
+ logger.error(f"File not found: {e}")
+ except IOError as e:
+ logger.error(f"IOError: {e}")
+ except pytesseract.TesseractError as e:
+ logger.error(f"Tesseract error: {e}")
+ except cv2.error as e:
+ logger.error(f"OpenCV error processing {image_path}: {e}")
+ except Exception as e:
+ logger.error(
+ f"An unexpected error occurred while processing {image_path}: {e}"
+ )
+
+ return "" # Return empty string on error
+
+ def run(
+ self, output_file: Optional[Union[list[str], str, os.PathLike]] = None
+ ) -> Optional[List[str]]:
+ """
+ Runs the OCR extraction process on the input file(s) or directory.
+
+ Args:
+ output_file: Optional path to a single output file. If provided, all
+ extracted text will be written to this file. If None, output
+ files will be generated based on input image names.
+
+ Returns:
+ A list of extracted texts, or None if no images were processed.
+ If output_file is provided, returns a list with a single string.
+ """
+
+ image_list = DirectoryScanner(self.input_obj).run()
+ num_images = len(image_list)
+ extracted_texts = []
+
+ if num_images == 0:
+ logger.warning("No images found to process.")
+ return None
+
+ try:
+ if output_file:
+ # Process all images and concatenate text into one output file
+ all_text = ""
+ with Progress() as progress:
+ task = progress.add_task(
+ "[yellow]Extracting text...", total=num_images
+ )
+ for image_path in image_list:
+ all_text += (
+ self._process_image(
+ image_path, os.path.splitext(output_file)[0] + ".txt"
+ )
+ + self.sep
+ )
+ progress.update(task, advance=1)
+ with open(output_file, "w", encoding="utf-8") as f:
+ f.write(all_text)
+ return [all_text] # Return a list containing the combined text
+
+ else:
+ # Process each image individually, creating separate output files
+ with Progress() as progress:
+ task = progress.add_task(
+ "[yellow]Extracting text...", total=num_images
+ )
+ for image_path in image_list:
+ _output_file = (
+ os.path.splitext(os.path.basename(image_path))[0] + ".txt"
+ )
+ _output_file = modify_filename_if_exists(_output_file)
+ text = self._process_image(image_path, _output_file)
+ extracted_texts.append(text)
+ progress.update(task, advance=1)
+ return extracted_texts
+
+ except KeyboardInterrupt:
+ print(
+ f"\n[{bg.YELLOW}X{RESET}]Operation interrupted by {fg.UBLUE}user{RESET}.[/]"
+ )
+ sys.exit(0)
+ except Exception as e:
+ logger.error(f"An unexpected error occurred: {bg.RED}{e}{RESET}")
+ return None # Ensure None is returned on error
diff --git a/filemac/core/pdf/core.py b/filemac/core/pdf/core.py
new file mode 100644
index 0000000..e9f3197
--- /dev/null
+++ b/filemac/core/pdf/core.py
@@ -0,0 +1,415 @@
+import os
+import subprocess
+import sys
+
+import PyPDF2
+from pdf2image import convert_from_path
+from PIL import Image # ImageSequence
+from tqdm.auto import tqdm
+from ...utils.simple import logger
+from ..document import DocConverter
+from ..exceptions import FilemacError, FileSystemError
+from ...utils.colors import fg, bg, rs
+from ..ocr import ExtractText
+
+
+RESET = rs
+DEFAULT_SEPARATOR = "\n"
+
+
+class PDF2LongImageConverter:
+ def __init__(self, doc):
+ self.document = doc
+
+ def preprocess(self):
+ ext = self.doc.split(".")[-1].lower()
+ if ext == "pdf":
+ long_image = self.convert(self.doc)
+ return long_image
+ if ext == "doc" or ext == "docx":
+ conv = DocConverter(self.doc)
+
+ path = conv.word_to_pdf()
+ long_image = self.convert(path)
+ return long_image
+ elif ext == "odt":
+ return self.subprocess_executor()
+
+ def subprocess_executor(self):
+ # pdf_file = ext = doc.split('.')[0] + 'docx'
+ logger.info(f"{fg.DCYAN}Invoked soffice ..{RESET}")
+ subprocess.call(
+ [
+ "soffice",
+ "--convert-to",
+ "pdf",
+ self.document,
+ "--outdir",
+ os.path.dirname(self.document),
+ ]
+ )
+ pdf_file = os.path.abspath(
+ os.path.dirname(self.document)
+ + "/"
+ + (self.document.split("/")[-1].split(".")[0])
+ + ".pdf"
+ )
+ long_image = self.convert(pdf_file)
+ return long_image
+
+ @staticmethod
+ def convert(pdf_file):
+ try:
+ logger.info(f"{fg.BYELLOW}Read pdf{RESET}")
+ images = convert_from_path(pdf_file)
+ out_img = pdf_file[:-4] + ".png"
+ heights = [img.size[1] for img in images]
+ total_height = sum(heights)
+ max_width = max([img.size[0] for img in images])
+
+ logger.info(f"{fg.DCYAN}Draw image ..{RESET}")
+ new_im = Image.new("RGB", (max_width, total_height))
+
+ y_offset = 0
+ for i, img in enumerate(images):
+ logger.info(f"{fg.BBLUE}{i}{RESET}", end="\r")
+ new_im.paste(img, (0, y_offset))
+ y_offset += img.size[1]
+ logger.info(f"{fg.BYELLOW}Save dest: {fg.BMAGENTA}{out_img}{RESET}")
+ new_im.save(out_img)
+ logger.info(f"{fg.BGREEN}Success😇✅{RESET}")
+ return out_img
+ except FileNotFoundError:
+ raise FileSystemError(f"{fg.RED}File not found!{RESET}")
+ except KeyboardInterrupt:
+ logger.DEBUG("\nQuit❕")
+ sys.exit()
+ except Exception as e:
+ raise FilemacError(f"{fg.RED}{e}{RESET}")
+
+
+class PageExtractor:
+ """
+ Extract pages specified by pange range from a pdf file and save them as a new file
+ Args:
+ Pdf -> pdf file to be operated on.
+ Llimit -> lower limit, the start page for extraction
+ Ulimit -> upper limit, the end of extraction. Only one page (Llimit) is extracted ifnoUlimit is specified
+ Range of pages to be extracted is given by Llimit and Ulimit inclusive
+ Returns:
+ outf-> the output file contsining the extracted pages
+ """
+
+ def __init__(
+ self,
+ pdf,
+ Llimits: int,
+ Ulimit: int = None,
+ ):
+ limits = [Llimits, Ulimit]
+ self.pdf = pdf
+ self.start = limits[0] - 1
+ self.stop = limits[-1]
+
+ self.outf = f"{pdf.split('.')[0]}_{self.start}_{self.stop}_extract.pdf"
+
+ if self.stop is None:
+ self.start = self.start
+ self.stop = self.start + 1
+ self.outf = f"{pdf.split('.')[0]}_{self.start + 1}_extract.pdf"
+
+ def getPages(self):
+ """
+ Extract the the page range. Write the pages to new pdf file
+ if self.stop (Ulimit) == -1 all pages are extracted from the Llimit to the last Page
+ """
+ try:
+ reader = PyPDF2.PdfReader(self.pdf)
+
+ if self.stop == -1:
+ self.stop = len(reader.pages)
+
+ pdf_writer = PyPDF2.PdfWriter()
+ print(f"{fg.BBLUE}[🤖]{fg.BBLUE} Extracting:{RESET}")
+ for page_num in range(self.start, self.stop):
+ print(
+ f"{fg.BBLUE}[📄]{RESET}{fg.DCYAN}Page {page_num + 1}{RESET}"
+ )
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ # Write the merged PDF to the output file
+ with open(self.outf, "wb") as out_file:
+ pdf_writer.write(out_file)
+ print(
+ f"{fg.BBLUE}[+]{RESET} {fg.BWHITE}File {fg.BMAGENTA}{self.outf}{RESET}"
+ )
+ return self.outf
+ except KeyboardInterrupt:
+ print("\n [!] Quit")
+ exit(2)
+ except FileNotFoundError as e:
+ print(f"[{bg.BRED}-{RESET}] {fg.RED}{e}{RESET}")
+ except Exception as e:
+ print(e)
+ # raise
+
+ @staticmethod
+ def _entry_(kwargs):
+ """
+ Args:
+ kwargs type: list - Contains Upper and lower limit (first and last page)
+ Returns:
+ None
+ """
+ if len(kwargs) > 2:
+ arg1, arg2, arg3 = kwargs
+ init = PageExtractor(arg1, int(arg2), int(arg3))
+ init.getPages()
+ elif len(kwargs) == 2:
+ (
+ arg1,
+ arg2,
+ ) = kwargs
+ init = PageExtractor(arg1, int(arg2))
+ init.getPages()
+ else:
+ pass
+
+
+class PDFCombine:
+ def __init__(self, obj1, obj2=None, outf=None, order="AA"):
+ self.obj1 = obj1
+ self.obj2 = obj2
+ self.outf = outf
+ self.order = order
+
+ if self.outf is None:
+ try:
+ self.outf = os.path.join(
+ os.path.join(
+ os.path.split(self.obj1[0])[0],
+ f"{os.path.split(self.obj1[0])[1].split('.')[0]}_{os.path.split(self.obj1[1])[1].split('.')[0]}_filemac.pdf",
+ )
+ )
+ except Exception:
+ self.outf = "Filemac_pdfjoin.pdf"
+
+ def controller(self):
+ if self.order in {"AB", "BA", "ABA", "BAB"}:
+ self.combine_pdfs_ABA_interleave()
+ elif self.order in {"AA", "BB", "AAB", "BBA"}:
+ if type(self.obj1) is list:
+ self.merge_All_AAB()
+ else:
+ self.combine_pdfs_AAB_order()
+
+ def combine_pdfs_ABA_interleave(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+ # Create PdfReader objects for each input PDF file
+ pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1]
+
+ max_pages = max(len(reader.pages) for reader in pdf_readers)
+ # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files]
+
+ for page_num in range(max_pages):
+ for reader in pdf_readers:
+ if page_num < len(reader.pages):
+ print(
+ f"{fg.CYAN}Page {fg.BBLUE}{page_num + 1}/{len(reader.pages)}{RESET}",
+ end="\r",
+ )
+ # Order pages in terms of page1-pd1, page2-pd2
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ with open(self.outf, "wb") as self.outf:
+ pdf_writer.write(self.outf)
+ print(
+ f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}"
+ )
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def combine_pdfs_AAB_order(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+ reader1 = PyPDF2.PdfReader(self.obj1)
+ reader2 = PyPDF2.PdfReader(self.obj2)
+ # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files]
+
+ print(f"{fg.CYAN}File A{RESET}")
+ for p1_num in range(len(reader1.pages)):
+ print(f"Page {p1_num + 1}/{len(reader1.pages)}", end="\r")
+ p1 = reader1.pages[p1_num]
+ # Order pages in terms of page1-pd1, page2-pd2
+ pdf_writer.add_page(p1)
+
+ print(f"\n{fg.CYAN}File B{RESET}")
+ for p2_num in range(len(reader2.pages)):
+ print(f"Page {p2_num + 1}/{len(reader2.pages)}", end="\r")
+ p2 = reader2.pages[p2_num]
+ pdf_writer.add_page(p2)
+
+ with open(self.outf, "wb") as self.outf:
+ pdf_writer.write(self.outf)
+ print(
+ f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}"
+ )
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def merge_All_AAB(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+
+ # List to store the reader objects
+ pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1]
+
+ # max_pages = max(len(reader.pages) for reader in pdf_readers)
+
+ for reader in pdf_readers:
+ for page_num in range(len(reader.pages)):
+ print(
+ f"{fg.BWHITE}Page {fg.CYAN}{page_num + 1}/{len(reader.pages)}{RESET}",
+ end="\r",
+ )
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ # Write the merged PDF to the output file
+ with open(self.outf, "wb") as out_file:
+ pdf_writer.write(out_file)
+ print(
+ f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf}{RESET}"
+ )
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class Scanner:
+ """Implementation of scanning to extract data from pdf files and images
+ input_file -> file to be scanned pdf,image
+ Args:
+ input_file->file to be scanned
+ no_strip-> Preserves text formating once set to True, default: False
+ Returns:
+ None"""
+
+ def __init__(self, input_file, sep: str = DEFAULT_SEPARATOR):
+ self.input_file = input_file
+ self.sep = sep
+
+ def preprocess(self):
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+
+ def scanPDF(self, obj=None):
+ """Obj - object for scanning where the object is not a list"""
+ pdf_list = self.preprocess()
+ pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
+ if obj:
+ pdf_list = [obj]
+
+ for pdf in pdf_list:
+ out_f = pdf[:-3] + "txt"
+ print(f"{fg.YELLOW}Read pdf ..{RESET}")
+
+ with open(pdf, "rb") as f:
+ reader = PyPDF2.PdfReader(f)
+ text = ""
+
+ pg = 0
+ for page_num in range(len(reader.pages)):
+ pg += 1
+
+ print(f"{fg.BYELLOW}Progress:{RESET}", end="")
+ print(f"{fg.CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r")
+ page = reader.pages[page_num]
+ text += page.extract_text()
+
+ print(f"\n{text}")
+ print(f"\n{fg.YELLOW}Write text to {fg.GREEN}{out_f}{RESET}")
+ with open(out_f, "w") as f:
+ f.write(text)
+
+ print(f"\n{fg.BGREEN}Ok{RESET}")
+
+ def scanAsImgs(self):
+ file = self.input_file
+ mc = DocConverter(file)
+ img_objs = mc.doc2image()
+
+ text = ""
+
+ for i in tqdm(img_objs, desc="Extracting", leave=False):
+ extract = ExtractText(i, self.sep)
+ _text = extract.OCR()
+
+ if _text is not None:
+ text += _text
+ with open(f"{self.input_file[:-4]}_filemac.txt", "a") as _writer:
+ _writer.write(text)
+
+ def _cleaner_():
+ print(f"{fg.FMAGENTA}Clean")
+ for obj in img_objs:
+ if os.path.exists(obj):
+ print(obj, end="\r")
+ os.remove(obj)
+ txt_file = f"{obj[:-4]}.txt"
+ if os.path.exists(txt_file):
+ print(f"{bg.CYAN_BG}{txt_file}{RESET}", end="\r")
+ os.remove(txt_file)
+
+ _cleaner_()
+ from ...utils.screen import clear_screen
+
+ clear_screen()
+ print(f"{bg.GREEN}Full Text{RESET}")
+ print(text)
+ print(
+ f"{fg.BWHITE}Text File ={fg.IGREEN}{self.input_file[:-4]}_filemac.txt{RESET}"
+ )
+ print(f"{fg.GREEN}Ok✅{RESET}")
+ return text
+
+ def scanAsLongImg(self) -> bool:
+ """Convert the pdf to long image for scanning - text extraction"""
+
+ try:
+ pdf_list = self.preprocess()
+ pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
+ from ..pdf.core import PDF2LongImageConverter
+
+ for file in pdf_list:
+ converter = PDF2LongImageConverter(file)
+ file = converter.preprocess()
+
+ tx = ExtractText(file, self.sep)
+ text = tx.OCR()
+ if text is not None:
+ # print(text)
+ print(f"{fg.GREEN}Ok{RESET}")
+ return True
+ except Exception as e:
+ print(e)
diff --git a/filemac/core/recorder.py b/filemac/core/recorder.py
new file mode 100644
index 0000000..a8843db
--- /dev/null
+++ b/filemac/core/recorder.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+import numpy as np
+import sounddevice as sd
+import wavio
+import time
+from pynput import keyboard
+import sys
+
+
+class SoundRecorder:
+ def __init__(self, frequency=44100, channels=2, dtype=np.int16):
+ self.fs = frequency # Sample rate (samples per second)
+ self.channels = 2 # Number of audio channels
+ self.dtype = dtype # Data type for the recording
+
+ self.paused = False # Global flag for pause
+ self.recording = [] # Buffer for recorded chunks
+ self.start_time = 0 # Start time for elapsed time tracking
+ self.elapsed_time = 0 # Track elapsed time
+ self.running = True # Track recording status
+ self.filename = self.filename_prober()
+
+ def format_time(self, seconds):
+ hours = int(seconds // 3600)
+ minutes = int((seconds % 3600) // 60)
+ sec = int(seconds % 60)
+ return f"\033[34m{hours:02d}\033[35m:{minutes:02d}\033[32m:{sec:02d} \033[0m"
+
+ def on_press(self, key):
+ # global paused, running
+ try:
+ if key == keyboard.Key.space:
+ self.paused = not self.paused # Toggle pause/resume
+ if self.paused:
+ print("\nPaused... Press SPACE to resume.")
+ else:
+ print("\nRecording resumed... Press SPACE to pause.")
+ elif key == keyboard.Key.enter:
+ self.running = False # Stop recording
+ print("\nRecording finished.")
+ return False # Stop listener
+ except Exception as e:
+ print(f"Error: {e}")
+
+ def record_audio(self):
+ # global paused, recording, start_time, elapsed_time, running
+ print("Press SPACE to pause/resume, ENTER to stop and save.")
+ start_time = time.time()
+
+ def callback(indata, frames, callback_time, status):
+ if not self.paused:
+ self.recording.append(indata.copy())
+ self.elapsed_time = time.time() - start_time
+ print(f"Elapsed Time: {self.format_time(self.elapsed_time)}", end="\r")
+
+ with sd.InputStream(
+ samplerate=self.fs,
+ channels=self.channels,
+ dtype=self.dtype,
+ callback=callback,
+ ):
+ with keyboard.Listener(on_press=self.on_press) as listener:
+ while self.running:
+ time.sleep(0.1) # Prevents high CPU usage
+ listener.stop()
+
+ return (
+ np.concatenate(self.recording, axis=0)
+ if self.recording
+ else np.array([], dtype=self.dtype)
+ )
+
+ def run(self):
+ try:
+ r_data = self.record_audio()
+ self.save_audio(r_data)
+ return self.filename
+ except KeyboardInterrupt:
+ sys.exit()
+
+ def save_audio(self, recording):
+ if recording.size == 0:
+ print("No audio recorded.")
+ else:
+ wavio.write(self.filename, recording, self.fs, sampwidth=2)
+ print(f"Recording saved as {self.filename}")
+
+ @staticmethod
+ def filename_prober():
+ _filename = None
+
+ while not _filename:
+ _filename = input("\033[94mEnter Desired File Name\033[0;1;89m:")
+
+ filename = f"{_filename}.wav" if len(_filename.split(".")) < 2 else _filename
+ return filename
+
+
+if __name__ == "__main__":
+ try:
+ filename = input("\033[94mEnter Desired File Name\033[0;1;89m:") + ".wav"
+ recorder = SoundRecorder()
+ file = recorder.run()
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ exit(1)
diff --git a/filemac/core/svg/core.py b/filemac/core/svg/core.py
new file mode 100644
index 0000000..fcbede0
--- /dev/null
+++ b/filemac/core/svg/core.py
@@ -0,0 +1,47 @@
+import cairosvg
+
+
+class SVGConverter:
+ """
+ A utility class for converting SVG files to various formats using CairoSVG.
+ Supported formats: PNG, PDF, SVG (optimized).
+ """
+
+ @staticmethod
+ def to_png(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert SVG to PNG.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output PNG file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2png(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2png(url=input_svg, write_to=output_path)
+
+ @staticmethod
+ def to_pdf(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert SVG to PDF.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output PDF file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2pdf(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2pdf(url=input_svg, write_to=output_path)
+
+ @staticmethod
+ def to_svg(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert/Optimize SVG to SVG.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output SVG file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2svg(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2svg(url=input_svg, write_to=output_path)
diff --git a/filemac/core/text/core.py b/filemac/core/text/core.py
new file mode 100644
index 0000000..b09aa2e
--- /dev/null
+++ b/filemac/core/text/core.py
@@ -0,0 +1,111 @@
+"""Create a word document directly from a text file."""
+
+from docx import Document
+from docx.shared import Pt, RGBColor
+
+from ...utils.colors import fg, rs
+
+RESET = rs
+
+
+class StyledText:
+ """
+ Args:
+ obj-> input object (normally a formated text file)
+ fsize ->font-size default = 12: int
+ fstyle -> font-name default = Times New Roman: str
+ out_obj -> output object(file) name: str
+ Returns:
+ None
+
+ Given obj -> Text file where:
+ '#' is used to specify formarting
+ Only three heading leavels are supported.
+ '#' Heading1,
+ '##' -> Heading2,
+ '###' -> Heading3
+ """
+
+ def __init__(
+ self, obj, out_obj=None, fsize: int = 12, fstyle: str = "Times New Roman"
+ ):
+ self.obj = obj
+ self.out_obj = out_obj
+ self.fsize = fsize
+ self.fstyle = fstyle
+ if self.out_obj is None:
+ self.out_obj = f"{self.obj.split('.')[0]}_filemac.docx"
+
+ def text_to_word(self):
+ """
+ Create new document,
+ heading_styles -> define formating
+ Open the text file and read it line by line.
+ For every line check whether it starts with '#' format specify , ommit the specifier and formart the line.
+ Strip empty spaces from every line.
+ Set body font to fstyle and font size to fsize.
+ """
+
+ print(f"{fg.BWHITE}Set Font: {fg.CYAN}{self.fsize}{RESET}")
+ print(f"{fg.BWHITE}Set Style: {fg.CYAN}{self.fstyle}{RESET}")
+ # Create a new Document
+ doc = Document()
+
+ # Define formatting for headings and body text
+ head_font_name = self.fstyle
+ heading_styles = {
+ # Heading 1
+ 1: {"font_size": Pt(18), "font_color": RGBColor(126, 153, 184)},
+ # Heading 2
+ 2: {"font_size": Pt(16), "font_color": RGBColor(0, 120, 212)},
+ # Heading 3
+ 3: {"font_size": Pt(14), "font_color": RGBColor(0, 120, 212)},
+ # Heading 4
+ 4: {"font_size": Pt(13), "font_color": RGBColor(0, 120, 212)},
+ }
+
+ body_font_name = "Times New Roman"
+ body_font_size = Pt(self.fsize)
+ body_font_color = RGBColor(0, 0, 0) # Black color
+
+ # Open the text file and read content
+ with open(self.obj, "r") as file:
+ lines = file.readlines()
+
+ for i, line in enumerate(lines):
+ print(
+ f"{fg.BWHITE}Line: {fg.DCYAN}{i}{fg.YELLOW} of {fg.BLUE}{len(lines)}{RESET}",
+ end="\r",
+ )
+ # Determine heading level or body text
+ if line.startswith("#"):
+ level = line.count("#")
+ level = min(level, 3) # Support up to 3 levels of headings
+ style = heading_styles.get(level, heading_styles[1])
+ p = doc.add_paragraph()
+ # Remove '#' and extra space
+ run = p.add_run(line[level + 1 :].strip())
+ run.font.size = style["font_size"]
+ run.font.name = head_font_name
+ run.font.color.rgb = style["font_color"]
+ p.style = f"Heading{level}"
+ else:
+ p = doc.add_paragraph()
+ run = p.add_run(line.strip())
+ run.font.name = body_font_name
+ run.font.size = body_font_size
+ run.font.color.rgb = body_font_color
+
+ # Save the document
+ print("\n")
+ doc.save(self.out_obj)
+ print(
+ f"{fg.BWHITE}Text file converted to Word document: {fg.MAGENTA}{self.out_obj}{RESET}"
+ )
+
+
+if __name__ == "__main__":
+ init = StyledText("/home/skye/Documents/FMAC/file2.txt")
+
+ # Call the function
+ init.text_to_word()
diff --git a/filemac/core/tts/core.py b/filemac/core/tts/core.py
new file mode 100644
index 0000000..e69de29
diff --git a/filemac/core/tts/gtts.py b/filemac/core/tts/gtts.py
new file mode 100644
index 0000000..efa38a4
--- /dev/null
+++ b/filemac/core/tts/gtts.py
@@ -0,0 +1,562 @@
+import json
+import math
+import os
+import PyPDF2
+import shutil
+import sys
+from docx import Document
+from threading import Lock, Thread
+from typing import List, Union
+import requests
+from gtts import gTTS
+from pydub import AudioSegment
+from rich.errors import MarkupError
+from ..document import DocConverter
+from ...utils.colors import fg, rs
+from ...utils.simple import logger
+
+RESET = rs
+
+_ext_word = ["doc", "docx"]
+
+
+class GoogleTTS:
+ """Definition of audiofying class"""
+
+ def __init__(
+ self,
+ obj: Union[os.PathLike, str, List[Union[os.PathLike, str]]],
+ resume: bool = True,
+ ):
+ self.obj = obj
+ self.resume = resume
+
+ @staticmethod
+ def join_audios(files, output_file):
+ masterfile = output_file + "_master.mp3"
+ print(
+ f"{fg.BBLUE}Create a master file {fg.BMAGENTA}{masterfile}{RESET}",
+ end="\r",
+ )
+ # Create a list to store files
+ ogg_files = []
+ # loop through the directory while adding the ogg files to the list
+ for filename in files:
+ print(f"Join {fg.BBLUE}{len(files)}{RESET} files")
+ # if filename.endswith('.ogg'):
+ # ogg_file = os.path.join(path, filename)
+ ogg_files.append(AudioSegment.from_file(filename))
+
+ # Concatenate the ogg files
+ combined_ogg = ogg_files[0]
+ for i in range(1, len(files)):
+ combined_ogg += ogg_files[i]
+
+ # Export the combined ogg to new mp3 file or ogg file
+ combined_ogg.export(output_file + "_master.ogg", format="ogg")
+ print(
+ f"{fg.BGREEN}Master file:Ok {RESET}"
+ )
+
+ def Synthesise(
+ self,
+ text: str,
+ output_file: str,
+ CHUNK_SIZE: int = 1_000,
+ _tmp_folder_: str = "tmp_dir",
+ thread_name: str = None,
+ max_retries: int = 30,
+ ) -> None:
+ """Converts given text to speech using Google Text-to-Speech API."""
+ # from rich.progress import (BarColumn, Progress, SpinnerColumn,TextColumn)
+
+ config = ConfigManager()
+ # Define directories and other useful variables for genrating output_file and checkpoint_file
+ out_dir = os.path.split(output_file)[0]
+
+ thread_name = f"thread_{os.path.split(output_file.split('.')[0])[-1]}"
+ _file_ = os.path.split(output_file)[1]
+
+ _tmp_folder_ = os.path.join(out_dir, _tmp_folder_)
+
+ # Remove temporary dir if it exists, rare-cases since file names are mostly unique
+ if os.path.exists(_tmp_folder_) and self.resume is False:
+ # query = input(f"{fg.BBLUE}Remove the {os.path.join(out_dir, _tmp_folder_)} directory (y/n)?{RESET} ").lower() in ('y', 'yes')
+ shutil.rmtree(_tmp_folder_)
+
+ # Create temporary folder to house chunks
+ if not os.path.exists(_tmp_folder_):
+ logger.info(
+ f"{fg.BYELLOW}Create temporary directory = {fg.BBLUE}{_tmp_folder_}{RESET}"
+ )
+ os.mkdir(_tmp_folder_)
+
+ _full_output_path_ = os.path.join(_tmp_folder_, _file_)
+
+ # Read reume chunk from the configuration file
+ start_chunk = int(config.read_config_file(thread_name)) * 1_000
+ start_chunk = 0 if start_chunk is None else start_chunk
+
+ """ If chunk is not 0 multiply the chunk by the highest decimal value of the chunk size
+ else set it to 0 meaning file is being operated on for the first time
+ """
+ resume_chunk_pos = start_chunk * 1_000 if start_chunk != 0 else start_chunk
+
+ try:
+ print(f"{fg.BYELLOW}Start thread:: {thread_name}{RESET}")
+
+ total_chunks = math.ceil(len(text) / CHUNK_SIZE)
+
+ counter = (
+ math.ceil(resume_chunk_pos / CHUNK_SIZE) if resume_chunk_pos != 0 else 0
+ )
+
+ attempt = 0
+
+ while attempt <= max_retries:
+ try:
+ # Initialize progress bar for the overall process
+
+ for i in range(resume_chunk_pos, len(text), CHUNK_SIZE):
+ print(
+ f"Processing: chunk {fg.BMAGENTA}{counter}/{total_chunks} {fg.DCYAN}{counter / total_chunks * 100:.2f}%{RESET}\n",
+ end="\r",
+ )
+ chunk = text[i : i + CHUNK_SIZE]
+ # print(chunk)
+ if os.path.exists(f"{_full_output_path_}_{counter}.ogg"):
+ if counter == start_chunk:
+ print(
+ f"{fg.CYAN}Chunk vs file confict: {fg.BLUE}Resolving{RESET}"
+ )
+ os.remove(f"{_full_output_path_}_{counter}.ogg")
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ # Remove empty file
+ elif (
+ os.path.getsize(f"{_full_output_path_}_{counter}.ogg")
+ != 0
+ ):
+ os.remove(f"{_full_output_path_}_{counter}.ogg")
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ else:
+ output_filename = (
+ f"{_full_output_path_}_{counter + 1}.ogg"
+ )
+
+ else:
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ tts = gTTS(text=chunk, lang="en", slow=False)
+
+ tts.save(output_filename)
+
+ # Update current_chunk in the configuration
+ config.update_config_entry(thread_name, current_chunk=counter)
+
+ counter += 1
+
+ except FileNotFoundError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+
+ except (
+ requests.exceptions.ConnectionError
+ ): # Handle connectivity/network error
+ logger.error(f"{fg.RED}ConnectionError{RESET}")
+
+ # Exponential backoff for retries
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ # Increament the attempts
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ # Read chunk from configuration
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except (
+ requests.exceptions.HTTPError
+ ) as e: # Exponential backoff for retries
+ logger.error(f"HTTP error: {e.status_code} - {e.reason}")
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except (
+ ConnectionError,
+ ConnectionAbortedError,
+ ConnectionRefusedError,
+ ConnectionResetError,
+ ):
+ logger.error(f"{fg.RED}Connection at attempt{RESET}")
+
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except MarkupError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+ except Exception as e: # Handle all other types of exceptions
+ logger.error(
+ f"{fg.BMAGENTA}{attempt + 1}/{max_retries}:{fg.RED}{e}{RESET}"
+ )
+
+ for _sec_ in range(2**attempt, 0, -1):
+ pass
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ else:
+ print(
+ f"{fg.BMAGENTA}Conversion success✅. \n {fg.CYAN}INFO\t Create masterfile{RESET}"
+ )
+
+ if (
+ len(os.listdir(_tmp_folder_)) > 2
+ ): # Combine generated gTTS objects
+ from .JoinAudios import JoinAudios
+
+ joiner = JoinAudios(_tmp_folder_, masterfile=output_file)
+ joiner.worker()
+ # Remove temporary files
+ shutil.rmtree(_tmp_folder_)
+
+ break # Exit the retry loop if successfull
+
+ else:
+ print(
+ f"{fg.RED}Maximum retries reached. Unable to complete the operation after {fg.BMAGENTA} {max_retries} attempts.{RESET}"
+ )
+ sys.exit(2)
+
+ finally:
+ pass
+
+ @staticmethod
+ def pdf_to_text(pdf_path):
+ logger.info(f"{fg.GREEN} Initializing pdf to text conversion{RESET}")
+ try:
+ with open(pdf_path, "rb") as file:
+ pdf_reader = PyPDF2.PdfReader(file)
+ text = ""
+ _pg_ = 0
+ print(f"{fg.YELLOW}Convert pages..{RESET}")
+ for page_num in range(len(pdf_reader.pages)):
+ _pg_ += 1
+ logger.info(
+ f"Page {fg.BBLUE}{_pg_}{RESET}/{len(pdf_reader.pages)}"
+ )
+ page = pdf_reader.pages[page_num]
+ text += page.extract_text()
+ print(f"{fg.BGREEN}Ok{RESET}\n")
+ return text
+ except Exception as e:
+ logger.error(
+ f"{fg.RED}Failed to extract text from '{fg.YELLOW}{pdf_path}'{RESET}:\n {e}"
+ )
+
+ @staticmethod
+ def text_file(input_file):
+ try:
+ with open(input_file, "r", errors="ignore") as file:
+ text = file.read().replace("\n", " ")
+ return text
+ except FileNotFoundError:
+ logger.error("File '{}' was not found.📁".format(input_file))
+ except Exception as e:
+ logger.error(f"{fg.RED}{str(e)}{RESET}")
+
+ @staticmethod
+ def docx_to_text(docx_path):
+ try:
+ logger.info(f"{fg.BLUE} Converting {docx_path} to text{RESET}")
+ doc = Document(docx_path)
+ paragraphs = [paragraph.text for paragraph in doc.paragraphs]
+ return "\n".join(paragraphs)
+ except FileNotFoundError:
+ logger.error(f"File '{docx_path}' was not found.📁")
+ except Exception as e:
+ logger.error(
+ f"{fg.RED}Error converting {docx_path} to text: {e} {RESET}"
+ )
+
+ class ThreadClient:
+ def __init__(self, instance):
+ self.instance = instance
+ self.lock = Lock()
+ self.config = ConfigManager()
+
+ def audiofy(self, num_threads=3):
+ ls = ("pdf", "docx", "doc", "txt", "ppt", "pptx")
+
+ def create_thread(item, thread_name):
+ # Create a unique temp dir for each file
+ temp_dir = f"tmp_dir_{os.path.split(item.split('.')[0])[-1]}"
+
+ # Ensure proper locking when adding config entry
+ with self.lock:
+ # Record config entry for each item
+ self.config.add_config_entry(
+ thread_name, f"{item.split('.')[0]}", temp_dir, 0
+ )
+
+ # Create and return the thread
+ return Thread(
+ target=self.worker,
+ args=(item, temp_dir, thread_name),
+ name=thread_name,
+ )
+
+ threads = []
+ processed_items = 0
+
+ # Process a list of files
+ def process_batch():
+ for thread in threads:
+ thread.start()
+ for thread in threads:
+ thread.join()
+ threads.clear() # Clear thread list after batch is done
+
+ # Handle files provided as a list
+ if isinstance(self.instance.obj, list):
+ for item in self.instance.obj:
+ item = os.path.abspath(item)
+ if os.path.isfile(item) and item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ processed_items += 1
+
+ # Process threads in batches of 'num_threads'
+ if processed_items % num_threads == 0:
+ process_batch()
+
+ # Process remaining threads in case the list isn't a perfect multiple of num_threads
+ if threads:
+ process_batch()
+
+ # Handle a single file
+ elif os.path.isfile(self.instance.obj):
+ item = os.path.abspath(self.instance.obj)
+ if item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ process_batch() # Process immediately for single file
+
+ # Handle a directory of files
+ elif os.path.isdir(self.instance.obj):
+ for item in os.listdir(self.instance.obj):
+ item = os.path.abspath(item)
+ if os.path.isfile(item) and item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ processed_items += 1
+
+ # Process threads in batches
+ if processed_items % num_threads == 0:
+ process_batch()
+
+ # Process remaining threads
+ if threads:
+ process_batch()
+
+ def worker(self, input_file, _temp_dir_, thread_name):
+ output_file = os.path.split(input_file)[-1].split(".")[0] + ".ogg"
+ print(f"Thread {thread_name} processing file: {input_file}")
+
+ try:
+ # Extract text based on file type
+ if input_file.endswith(".pdf"):
+ text = GoogleTTS.pdf_to_text(input_file)
+ elif input_file.lower().endswith(tuple(_ext_word)):
+ text = GoogleTTS.docx_to_text(input_file)
+ elif input_file.endswith(".txt"):
+ text = GoogleTTS.text_file(input_file)
+ elif input_file.split(".")[-1] in ("ppt", "pptx"):
+ conv = DocConverter(input_file)
+ word = conv.ppt_to_word()
+ conv = DocConverter(word)
+ text = GoogleTTS.text_file(conv.word_to_txt())
+ else:
+ raise ValueError(
+ "Unsupported file format. Please provide a PDF, txt, or Word document."
+ )
+
+ # Synthesize audio using the extracted text
+ self.instance.Synthesise(
+ text, output_file, _tmp_folder_=_temp_dir_, thread_name=thread_name
+ )
+ print(f"Thread {thread_name} completed processing {input_file}")
+
+ except Exception as e:
+ print(f"Error in thread {thread_name}: {e}")
+ except KeyboardInterrupt:
+ print(f"Thread {thread_name} interrupted.")
+ sys.exit(1)
+
+
+class ConfigManager:
+ def __init__(self, config_path="filemac_config.json"):
+ self.config_path = config_path
+
+ def create_config_file(self, config_data):
+ """
+ Create or overwrite a configuration file to record thread names, associated file names, and current chunks.
+
+ Args:
+ config_data(list): A list of dictionaries containing thread name, associated file name, temp dir, and current chunk.
+ """
+ try:
+ # Ensure the output directory exists
+ output_dir = os.path.dirname(self.config_path)
+ if output_dir and not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ # Write the configuration data to a JSON file
+ with open(self.config_path, "w") as config_file:
+ json.dump(config_data, config_file, indent=4)
+
+ print(f"Configuration file '{self.config_path}' created successfully.")
+ except Exception as e:
+ print(f"Error creating configuration file: {e}")
+
+ def read_config_file(self, thread=None):
+ """
+ Read the configuration file and return the data or a specific thread's current chunk.
+
+ Args:
+ thread (str): The thread name to search for in the config. If None, returns the full config.
+
+ Returns:
+ dict/list: Returns a specific entry for the thread or the full configuration data.
+ None: If the file doesn't exist or thread is not found.
+ """
+ try:
+ if not os.path.exists(self.config_path):
+ print(f"Configuration file '{self.config_path}' not found.")
+ return None
+
+ with open(self.config_path, "r") as config_file:
+ config = json.load(config_file)
+
+ if thread is None:
+ return config # Return entire configuration
+
+ # Search for specific thread's current chunk
+ for entry in config:
+ if entry["thread_name"] == thread:
+ return entry.get("current_chunk", None)
+
+ print(f"Entry for thread '{thread}' not found.")
+ return None
+
+ except Exception as e:
+ print(f"Error reading configuration file: {e}")
+ return None
+
+ def add_config_entry(self, thread_name, associated_file, tmp_dir, current_chunk):
+ """
+ Add a new entry to the configuration file.
+
+ Args:
+ thread_name (str): The name of the thread to be added.
+ associated_file (str): The associated file name for the thread.
+ tmp_dir (str): Temporary directory for the thread.
+ current_chunk (int): The current chunk number for the thread.
+ """
+ try:
+ # Read existing config data or create a new list if the file doesn't exist
+ config_data = self.read_config_file() or []
+
+ # Check if the thread already exists in the configuration
+ for entry in config_data:
+ if entry["thread_name"] == thread_name:
+ print(
+ f"Thread '{thread_name}' already exists. Use 'update_config_entry' to update it."
+ )
+ return
+
+ # Add the new entry
+ config_data.append(
+ {
+ "thread_name": thread_name,
+ "associated_file": associated_file,
+ "tmp_dir": tmp_dir,
+ "current_chunk": current_chunk,
+ }
+ )
+
+ # Save the updated configuration
+ self.create_config_file(config_data)
+
+ except Exception as e:
+ print(f"Error adding config entry: {e}")
+
+ def update_config_entry(
+ self, thread_name, associated_file=None, tmp_dir=None, current_chunk=None
+ ):
+ """
+ Update an existing entry in the configuration file.
+
+ Args:
+ thread_name (str): The name of the thread to update.
+ associated_file (str, optional): The updated associated file name. Defaults to None.
+ tmp_dir (str, optional): The updated temporary directory. Defaults to None.
+ current_chunk (int, optional): The updated current chunk number. Defaults to None.
+ """
+ try:
+ # Read existing config data
+ config_data = self.read_config_file() or []
+
+ # Find the entry to update
+ for entry in config_data:
+ if entry["thread_name"] == thread_name:
+ if associated_file:
+ entry["associated_file"] = associated_file
+ if tmp_dir:
+ entry["tmp_dir"] = tmp_dir
+ if current_chunk is not None:
+ entry["current_chunk"] = current_chunk
+
+ # Save the updated configuration
+ self.create_config_file(config_data)
+ print(f"Thread '{thread_name}' updated successfully.")
+ return True
+
+ print(f"Thread '{thread_name}' not found in the configuration.")
+
+ except Exception as e:
+ print(f"Error updating config entry: {e}")
diff --git a/filemac/core/validator.py b/filemac/core/validator.py
new file mode 100644
index 0000000..51b70be
--- /dev/null
+++ b/filemac/core/validator.py
@@ -0,0 +1,19 @@
+from typing import Tuple
+from pathlib import Path
+
+
+class SystemValidator:
+ """Validates system requirements and dependencies."""
+
+ @staticmethod
+ def validate_file_permissions(temp_dir: Path) -> Tuple[bool, str]:
+ """Validate write permissions in temporary directory."""
+ try:
+ if temp_dir.is_file():
+ temp_dir = temp_dir.parent
+ test_file = temp_dir / "permission_test.txt"
+ test_file.write_text("test")
+ test_file.unlink()
+ return True, "Write permissions verified"
+ except (OSError, IOError) as e:
+ return False, f"Insufficient permissions: {str(e)}"
diff --git a/filemac/core/video/core.py b/filemac/core/video/core.py
new file mode 100644
index 0000000..a705599
--- /dev/null
+++ b/filemac/core/video/core.py
@@ -0,0 +1,185 @@
+"""
+Convert video file to from one format to another
+"""
+
+import os
+import subprocess
+import sys
+
+import cv2
+from moviepy import VideoFileClip
+from pydub import AudioSegment
+from tqdm import tqdm
+
+from ...utils.colors import fg, bg, rs
+from ...utils.formats import SUPPORTED_VIDEO_FORMATS, Video_codecs
+
+
+RESET = rs
+
+
+class VideoConverter:
+ def __init__(self, input_file, out_format=None):
+ self.input_file = input_file
+ self.out_format = out_format
+
+ def preprocess(self):
+ if self.out_format is None:
+ return None
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ if os.listdir(self.input_file) is None:
+ print(f"{bg.RED}Cannot work with empty folder{RESET}")
+ sys.exit(1)
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+
+ def ffmpeg_merger(self, obj: list = None):
+ video_list = self.preprocess(), obj
+ for input_video in video_list:
+ base, ext = input_video.split(".", 1)
+ output_file = f"{base}_new_.{ext}"
+
+ # keep the original video quality by using -c:v copy, which avoids re-encoding.
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ input_video,
+ "-i",
+ "audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ "-strict",
+ "experimental",
+ output_file,
+ ]
+ )
+
+ def pydub_merger(self, obj: list = None):
+ video_list = self.preprocess() or obj
+ for input_video in video_list:
+ output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][
+ 0
+ ]
+ # Process or manipulate audio with Pydub (e.g., adjust volume)
+ audio = AudioSegment.from_file("audio.mp3")
+ audio = audio + 6 # Increase volume by 6 dB
+ audio.export("processed_audio.mp3", format="mp3")
+
+ # Merge processed audio with video using FFmpeg
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ input_video,
+ "-i",
+ "processed_audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ output_file,
+ ]
+ )
+
+ def cv2_merger(self, obj: list = None):
+ video_list = self.preprocess(), obj
+ for input_video in video_list:
+ # Read video and save frames (without audio)
+ cap = cv2.VideoCapture(input_video)
+
+ # Retrieve width and height from the video
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
+
+ # _, ext = input_video.split('.')[0]
+ # output_file = f"{_}_new{ext}"
+ output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][
+ 0
+ ]
+ # Define the VideoWriter with the video dimensions
+ out = cv2.VideoWriter(
+ output_file, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
+ )
+
+ # Read frames from the original video and write them to the output
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if not ret:
+ break
+ out.write(frame)
+
+ # Release resources
+ cap.release()
+ out.release()
+
+ # Merge with audio using FFmpeg
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ "video_no_audio.mp4",
+ "-i",
+ "audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ output_file,
+ ]
+ )
+
+ def CONVERT_VIDEO(self):
+ try:
+ input_list = self.preprocess()
+ out_f = self.out_format.upper()
+ input_list = [
+ item
+ for item in input_list
+ if any(item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)
+ ]
+ # print(f"{fg.BYELLOW}Initializing conversion..{RESET}")
+
+ for file in tqdm(input_list):
+ if out_f.upper() in Video_codecs.keys():
+ _, ext = os.path.splitext(file)
+ output_filename = _ + "." + out_f.lower()
+ # print(output_filename)
+ elif (
+ out_f.upper() in SUPPORTED_VIDEO_FORMATS
+ and out_f.upper() not in Video_codecs.keys()
+ ):
+ print(
+ f"{fg.RED}Unsupported output format --> Pending Implementation{RESET}"
+ )
+ sys.exit(1)
+ else:
+ print(f"{fg.RED}Unsupported output format{RESET}")
+ sys.exit(1)
+
+ """Load the video file"""
+ video = VideoFileClip(file)
+
+ """Export the video to a different format"""
+ print(f"To: {fg.IWHITE}{output_filename}{RESET}")
+ video.write_videofile(output_filename, codec=Video_codecs[out_f])
+
+ """Close the video file"""
+ print(f"{fg.BGREEN}success{RESET}")
+ video.close()
+ except KeyboardInterrupt:
+ print("\nQuit❕")
+ sys.exit(1)
+ except Exception as e:
+ print(e)
diff --git a/filemac/core/warning.py b/filemac/core/warning.py
new file mode 100644
index 0000000..eece477
--- /dev/null
+++ b/filemac/core/warning.py
@@ -0,0 +1,13 @@
+import warnings
+
+
+def default_supressor():
+ # warnings.filterwarnings(action="ignore", category=warnings.defaultaction, module="numexpr")
+ warnings.simplefilter("ignore", RuntimeWarning)
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message="Your system is avx2 capable but pygame was not built with support for it.",
+ category=RuntimeWarning,
+ )
+ return True
diff --git a/filemac/dd.py b/filemac/dd.py
deleted file mode 100644
index 90fbe1f..0000000
--- a/filemac/dd.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from OCRTextExtractor import ExtractText
-img_objs = ['/home/skye/Software Engineering/Y2/SEM2/RV/SPE 2210 Client Side Programming Year II Semester II_1.png']
-text = ''
-for i in img_objs:
- extract = ExtractText(i)
- tx = extract.OCR()
- print(tx)
- if tx is not None:
- text += tx
-print(text)
diff --git a/filemac/fmac.py b/filemac/fmac.py
deleted file mode 100644
index 91b28ba..0000000
--- a/filemac/fmac.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3.11.7
-# multimedia_cli/main.py
-import argparse
-import logging
-import logging.handlers
-import sys
-
-from . import handle_warnings
-from .AudioExtractor import ExtractAudio
-from .colors import (RESET, DYELLOW)
-from .converter import (AudioConverter, FileSynthesis, ImageConverter,
- MakeConversion, Scanner, VideoConverter)
-from .formats import (SUPPORTED_AUDIO_FORMATS_SHOW, SUPPORTED_DOC_FORMATS,
- SUPPORTED_IMAGE_FORMATS_SHOW,
- SUPPORTED_VIDEO_FORMATS_SHOW)
-from .image_op import Compress_Size
-from .OCRTextExtractor import ExtractText
-from .Simple_v_Analyzer import SA
-
-# from .formats import SUPPORTED_INPUT_FORMATS, SUPPORTED_OUTPUT_FORMATS
-handle_warnings
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Eval:
-
- def __init__(self, file, outf):
- self.file = file
- self.outf = outf
-
- def document_eval(self):
- ls = ["docx", "doc"]
- sheetls = ["xlsx", "xls"]
- try:
- conv = MakeConversion(self.file)
- if self.file.lower().endswith(tuple(sheetls)):
- if self.outf.lower() == "csv":
- conv.convert_xlsx_to_csv()
- elif self.outf.lower() == "txt":
- conv.convert_xls_to_text()
- elif self.outf.lower() == "doc" or self.outf == "docx":
- conv.convert_xls_to_word()
- elif self.outf.lower() == "db":
- conv.convert_xlsx_to_database()
-
- elif self.file.lower().endswith(tuple(ls)):
- if self.outf.lower() == "txt":
- conv.word_to_txt()
- elif self.outf.lower() == "pdf":
- conv.word_to_pdf()
- elif self.outf.lower() == "pptx":
- conv.word_to_pptx()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.endswith('txt'):
- if self.outf.lower() == "pdf":
- conv.txt_to_pdf()
- elif self.outf.lower() == "doc" or self.outf == "docx" or self.outf == "word":
- conv.text_to_word()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.lower().endswith('ppt') or self.file.lower().endswith('pptx'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.ppt_to_word()
-
- elif self.file.lower().endswith('pdf'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.pdf_to_word()
- elif self.outf.lower() == "txt":
- conv.pdf_to_txt()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- else:
- print(f"{DYELLOW}Unsupported Conversion type{RESET}")
- except Exception as e:
- logger.error(e)
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="Multimedia Element Operations")
-
- parser.add_argument(
- "--convert_doc", help=f"Converter document file(s) to different format ie pdf_to_docx.\
- example {DYELLOW}filemac --convert_doc example.docx -t pdf{RESET}")
-
- parser.add_argument(
- "--convert_audio", help=f"Convert audio file(s) to and from different format ie mp3 to wav\
- example {DYELLOW}filemac --convert_audio example.mp3 -t wav{RESET}")
-
- parser.add_argument(
- "--convert_video", help=f"Convert video file(s) to and from different format ie mp4 to mkv.\
- example {DYELLOW}filemac --convert_video example.mp4 -t mkv{RESET}")
-
- parser.add_argument(
- "--convert_image", help=f"Convert image file(s) to and from different format ie png to jpg.\
- example {DYELLOW}filemac --convert_image example.jpg -t png{RESET}")
-
- parser.add_argument(
-
- "--convert_doc2image", help=f"Convert documents to images ie png to jpg.\
- example {DYELLOW}filemac --convert_doc2image example.pdf -t png{RESET}")
-
- parser.add_argument("-xA", "--extract_audio",
- help=f"Extract audio from a video.\
- example {DYELLOW}filemac -xA example.mp4 {RESET}")
-
- parser.add_argument(
- "-Av", "--Analyze_video", help=f"Analyze a given video.\
- example {DYELLOW}filemac --analyze_video example.mp4 {RESET}")
-
- parser.add_argument("-t", "--target_format",
- help="Target format for conversion (optional)")
-
- parser.add_argument(
- "--resize_image", help=f"change size of an image compress/decompress \
- example {DYELLOW}filemac --resize_image example.png -t png {RESET}")
-
- parser.add_argument("-t_size", help="used in combination with resize_image \
- to specify target image size")
-
- parser.add_argument(
- "-S", "--scan", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scan example.pdf {RESET}")
-
- parser.add_argument(
- "-SA", "--scanAsImg", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scanAsImg example.pdf {RESET}")
-
- parser.add_argument("--OCR", help=f"Extract text from an image.\
- example {DYELLOW}filemac --OCR image.png{RESET}")
-
- args = parser.parse_args()
-
-
-# Call function to handle document conversion inputs before begining conversion
- if args.convert_doc == 'help':
- print(SUPPORTED_DOC_FORMATS)
- sys.exit(1)
- if args.convert_doc:
- ev = Eval(args.convert_doc, args.target_format)
- ev.document_eval()
-
-
-# Call function to handle video conversion inputs before begining conversion
- elif args.convert_video:
- if args.convert_video == 'help' or args.convert_video is None:
- print(SUPPORTED_VIDEO_FORMATS_SHOW)
- sys.exit(1)
- ev = VideoConverter(args.convert_video, args.target_format)
- ev.CONVERT_VIDEO()
-# Call function to handle image conversion inputs before begining conversion
-
- elif args.convert_image:
- if args.convert_image == 'help' or args.convert_image is None:
- print(SUPPORTED_IMAGE_FORMATS_SHOW)
- sys.exit(1)
- conv = ImageConverter(args.convert_image, args.target_format)
- conv.convert_image()
-
-# Handle image resizing
- elif args.resize_image:
- res = Compress_Size(args.resize_image)
- res.resize_image(args.t_size)
-
-# Handle documents to images conversion
- elif args.convert_doc2image:
- conv = MakeConversion(args.convert_doc2image)
- conv.doc2image(args.target_format)
-
-# Call function to handle audio conversion inputs before begining conversion
- elif args.convert_audio:
- if args.convert_audio == 'help' or args.convert_audio is None:
- print(SUPPORTED_AUDIO_FORMATS_SHOW)
- sys.exit(1)
- ev = AudioConverter(args.convert_audio, args.target_format)
- ev.pydub_conv()
-
-
-# Call module to evaluate audio files before making audio extraction from input video files conversion
- elif args.extract_audio:
- vi = ExtractAudio(args.extract_audio)
- vi.moviepyextract()
-
-# Call module to scan the input and extract text
- elif args.scan:
- sc = Scanner(args.scan)
- sc.scanPDF()
-
-# Call module to scan the input FILE as image object and extract text
- elif args.scanAsImg:
- sc = Scanner(args.scanAsImg)
- tx = sc.scanAsImgs()
-# Call module to handle Candidate images for text extraction inputs before begining conversion
- elif args.OCR:
- conv = ExtractText(args.OCR)
- conv.OCR()
-
- elif args.Analyze_video:
- analyzer = SA(args.Analyze_video)
- analyzer.SimpleAnalyzer()
-
-
-if __name__ == "__main__":
- main()
diff --git a/filemac/formats.py b/filemac/formats.py
deleted file mode 100644
index 6490294..0000000
--- a/filemac/formats.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# multimedia_cli/formats.py
-from .colors import CYAN, DBLUE, DMAGENTA, DYELLOW, RESET
-
-SUPPORTED_DOC_FORMATS = f"""
-|---------------------------------------------------------------------------
-|{DBLUE}Input format{RESET} |{DBLUE}Output format{RESET} |
-|________________________________|__________________________________________|
-| xlsx {DYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) |
-| | |
-| doc/docx{DYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) |
-| | |
-| txt {DYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) |
-| | |
-| pdf {DYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) |
-| | |
-| pptx/ppt{DYELLOW}-------------------->{RESET}|doc/docx |
-| |
-|___________________________________________________________________________|
-"""
-
-
-def p():
- print(SUPPORTED_DOC_FORMATS)
-
-
-# Add supported input and output formats for each media type
-SUPPORTED_AUDIO_FORMATS = ["wav", # Waveform Audio File Format
- "mp3", # MPEG Audio Layer III
- "ogg",
- "flv",
- "ogv",
- "webm",
- "aac", # Advanced Audio Codec
- "bpf",
- "aiff",
- "flac"] # Free Lossless Audio Codec)
-
-SUPPORTED_AUDIO_FORMATS_SHOW = f'''
-|==============================|
-| {DBLUE}Supported I/O formats {RESET} |
-|==============================|
-| {CYAN} wav {DYELLOW} |
-| {CYAN} mp3 {DYELLOW} |
-| {CYAN} ogg {DYELLOW} |
-| {CYAN} flv {DYELLOW} |
-| {CYAN} ogv {DYELLOW} |
-| {CYAN} matroska {DYELLOW} |
-| {CYAN} mov {DYELLOW} |
-| {CYAN} webm {DYELLOW} |
-| {CYAN} aac {DYELLOW} |
-| {CYAN} bpf {DYELLOW} |
---------------------------------
-
-'''
-
-SUPPORTED_VIDEO_FORMATS = ["MP4", # MPEG-4 part 14
- "AVI", # Audio Video Interleave
- "OGV",
- "WEBM",
- "MOV", # QuickTime Movie
- "MKV", # Matroska Multimedia Container - MKV is known for its support of high-quality content.
- "FLV", #
- "WMV"]
-
-SUPPORTED_VIDEO_FORMATS_SHOW = f'''
-,_______________________________________,
-|x| {DBLUE}Supported I/O formats{RESET} |x|
-|x|-----------------------------------{DYELLOW}|x|
-|x| {DMAGENTA} MP4 {DYELLOW} |x|
-|x| {DMAGENTA} AVI {DYELLOW} |x|
-|x| {DMAGENTA} OGV {DYELLOW} |x|
-|x| {DMAGENTA} WEBM{DYELLOW} |x|
-|x| {DMAGENTA} MOV {DYELLOW} |x|
-|x| {DMAGENTA} MKV {DYELLOW} |x|
-|x| {DMAGENTA} FLV {DYELLOW} |x|
-|x| {DMAGENTA} WMV {DYELLOW} |x|
-|,|___________________________________|,|{DYELLOW}
-'''
-
-SUPPORTED_IMAGE_FORMATS = {
- "JPEG": ".jpg", # Joint Photographic Experts Group -Lossy compression
- "PNG": ".png", # Joint Photographic Experts Group - not lossy
- "GIF": ".gif", # Graphics Interchange Format
- "BM": ".bmp",
- "BMP": ".dib",
- "DXF": ".dxf", # Autocad format 2D
- "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression
- "EXR": ".exr",
- "pic": ".pic",
- "pict": "pct",
- "PDF": ".pdf",
- "WebP": ".webp",
- "ICNS": ".icns",
- "PSD": ".psd",
- "SVG": ".svg", # Scalable vector Graphics
- "EPS": ".eps",
- "PostSciript": ".ps",
- "PS": ".ps"}
-
-SUPPORTED_IMAGE_FORMATS_SHOW = f'''
-__________________________________________
-|x|{DBLUE}Supported I/O formats{RESET} |x|
-|x|_____________________________________{DYELLOW}|x|
-|x| {DMAGENTA} JPEG {DYELLOW} |x|
-|x| {DMAGENTA} PNG {DYELLOW} |x|
-|x| {DMAGENTA} GIF {DYELLOW} |x|
-|x| {DMAGENTA} BM {DYELLOW} |x|
-|x| {DMAGENTA} TIFF {DYELLOW} |x|
-|x| {DMAGENTA} EXR {DYELLOW} |x|
-|x| {DMAGENTA} PDF {DYELLOW} |x|
-|x| {DMAGENTA} WebP{DYELLOW} |x|
-|x| {DMAGENTA} ICNS {DYELLOW} |x|
-|x| {DMAGENTA} PSD {DYELLOW} |x|
-|x| {DMAGENTA} SVG {DYELLOW} |x|
-|x| {DMAGENTA} EPS {DYELLOW} |x|
-|x| {DMAGENTA} Postscript {DYELLOW} |x|
-|_|_____________________________________|x|
-'''
-
-SUPPORTED_DOCUMENT_FORMATS = ['pdf', 'doc', 'docx', 'csv', 'xlsx', 'xls',
- 'ppt', 'pptx', 'txt', 'ogg', 'mp3', 'audio']
diff --git a/filemac/handle_warnings.py b/filemac/handle_warnings.py
deleted file mode 100644
index 3e592d1..0000000
--- a/filemac/handle_warnings.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import warnings
-
-warnings.simplefilter("ignore", RuntimeWarning)
-with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore", message="Your system is avx2 capable but pygame was not built with support for it.", category=RuntimeWarning)
diff --git a/filemac/image_op.py b/filemac/image_op.py
deleted file mode 100644
index 61cfe6d..0000000
--- a/filemac/image_op.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from PIL import Image
-import os
-import logging
-import logging.handlers
-
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Compress_Size:
-
- def __init__(self, input_image_path):
- self.input_image_path = input_image_path
-
- def resize_image(self, target_size):
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- original_image = Image.open(input_image_path)
- original_size = original_image.size
- size = os.path.getsize(input_image_path)
- print(f"Original image size \033[93m{size/1000_000:.2f}MiB")
-
- # Calculate the aspect ratio of the original image
- aspect_ratio = original_size[0] / original_size[1]
-
- # Convert the target sixze to bytes
- tz = int(target_size[:-2])
- if target_size[-2:].lower() == 'mb':
- target_size_bytes = tz * 1024 * 1024
- elif target_size[-2:].lower() == 'kb':
- target_size_bytes = tz * 1024
- else:
- logger.warning("Invalid units. Please use either \033[1;95m'MB'\033[0m\
- or \033[1;95m'KB'\033[0m")
-
- # Calculate the new dimensions based on the target size
- new_width, new_height = Compress_Size.calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes)
- print("\033[94mProcessing ..\033[0m")
- resized_image = original_image.resize((new_width, new_height))
- resized_image.save(output_image_path)
- t_size = os.path.getsize(output_image_path)/1000_000
- print("\033[1;92mOk\033[0m")
- print(f"Image resized to \033[1;93m{t_size:.2f}\033[0m and saved to \033[1;93m{output_image_path}")
-
- def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes):
- # Calculate the new dimensions based on the target size in bytes
- original_size_bytes = original_size[0] * original_size[1] * 3 # Assuming 24-bit color depth
- scale_factor = (target_size_bytes / original_size_bytes) ** 0.5
-
- new_width = int(original_size[0] * scale_factor)
- new_height = int(original_size[1] * scale_factor)
-
- return new_width, new_height
-
-
-if __name__ == "__main__":
- input_image_path = input("Enter the path to the input image: ")
- target_size = input("Enter the target output size (MB or KB): ")
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- init = Compress_Size(input_image_path)
- init.resize_image(target_size)
diff --git a/filemac/miscellaneous/VKITPro.py b/filemac/miscellaneous/VKITPro.py
new file mode 100644
index 0000000..77b7c9c
--- /dev/null
+++ b/filemac/miscellaneous/VKITPro.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+import logging
+import os
+
+import cv2
+from colorama import Fore, Style, init
+from moviepy import AudioFileClip, VideoFileClip
+
+# import numpy as np
+from tqdm import tqdm
+
+# Initialize colorama
+init(autoreset=True)
+
+# Custom formatter class to add colors
+
+
+class CustomFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: Fore.BLUE,
+ logging.INFO: Fore.GREEN,
+ logging.WARNING: Fore.YELLOW,
+ logging.ERROR: Fore.RED,
+ logging.CRITICAL: Fore.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, Fore.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{Style.RESET_ALL}"
+
+
+# Set up logging
+logger = logging.getLogger("colored_logger")
+handler = logging.StreamHandler()
+handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s"))
+
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+class AudioMan:
+ def __init__(self, obj):
+ self.obj = obj
+ # Load the video file
+ self.video = VideoFileClip(self.obj)
+ basename, _ = os.path.splitext(self.obj)
+ self.outfile = basename + ".wav"
+
+ def Extract_audio(self):
+ # audio = video.audio
+ self.video.audio.write_audiofile(self.outfile)
+
+ def Write_audio(self, outfile):
+ # Load the audio file
+ audio = AudioFileClip(outfile)
+ new = self.video.set_audio(audio)
+ # Export the final video
+ return new.write_videofile(
+ "output_@vkitpro.mp4", codec="libx264", audio_codec="aac", bitrate="125.4k"
+ )
+
+
+class VideoRepair:
+ def __init__(self, obj):
+ self.obj = obj
+
+ logger.info("Open the file")
+ self.cap = cv2.VideoCapture(obj)
+ if not self.cap.isOpened():
+ logger.error("Could not open video file.")
+ return
+
+ # Collect file metadata
+ self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = self.cap.get(cv2.CAP_PROP_FPS)
+
+ logger.info(
+ "File info:\n"
+ f"\tFrames: \033[95m{self.frame_count}\033[0;32m\n"
+ f"\tFrame Width: \033[0;95m{width}\033[0;32m\n"
+ f"\tFrame Height: \033[0;95m{height}\033[0;32m\n"
+ f"\tFPS: \033[0;95m{fps}\033[0m"
+ )
+
+ def get_frame_size_in_bytes(frame):
+ return frame.nbytes # Get the size of the frame in bytes
+
+ def Repair(self, batch: int = 2):
+ logger.info("Find missing frames and index them")
+ """batch_size = batch * 1024 * 1024
+ l_frame = None
+ r_frame = None
+ current_batch_size = 0
+ frames_batch = []"""
+
+ for _ in tqdm(range(self.frame_count), desc="Progress"):
+ ret, frame = self.cap.read()
+ if not ret:
+ # If no frame is captured, break the loop
+ self.frames.append(None)
+ else:
+ self.frames.append(frame)
+
+ self.cap.release()
+
+
+class cv2Repair:
+ def __init__(self):
+ self = self
+
+ def preprocessor(input_video_path):
+ cap = cv2.VideoCapture(input_video_path)
+
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if not ret:
+ pass
+ else:
+ yield frame # Yield frame one by one (lazy loading)
+
+ cap.release()
+
+ def repair(self):
+ # Process the frames using the generator
+ for frame in tqdm(self.preprocessor("/home/skye/Videos/FixedSupercar.mp4")):
+ run = AudioMan()
+ run.Write_audio()
+
+
+if __name__ == "__main__":
+ run = AudioMan("/home/skye/Videos/FixedSupercar.mp4")
+ run.Write_audio("/home/skye/Videos/supercar.wav")
diff --git a/filemac/miscellaneous/VRKit.py b/filemac/miscellaneous/VRKit.py
new file mode 100644
index 0000000..07e511f
--- /dev/null
+++ b/filemac/miscellaneous/VRKit.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python3
+import logging
+import cv2
+from colorama import Fore, Style, init
+
+# import numpy as np
+from tqdm import tqdm
+
+# Initialize colorama
+init(autoreset=True)
+
+# Custom formatter class to add colors
+
+
+class CustomFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: Fore.BLUE,
+ logging.INFO: Fore.GREEN,
+ logging.WARNING: Fore.YELLOW,
+ logging.ERROR: Fore.RED,
+ logging.CRITICAL: Fore.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, Fore.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{Style.RESET_ALL}"
+
+
+# Set up logging
+logger = logging.getLogger("colored_logger")
+handler = logging.StreamHandler()
+handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s"))
+
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+def detect_missing_frames(frames):
+ """Implementation for missing frame detection and index them, append index
+ of missing frames to a list"""
+ missing_frames = []
+ logger.info("Index missing frames")
+ for i in tqdm(range(1, len(frames) - 1), desc="Progress"):
+ if frames[i] is None:
+ missing_frames.append(i)
+
+ # Exit when no missing frames are found
+ if not missing_frames:
+ exit(0)
+ return missing_frames
+
+
+def interpolate_frame(prev_frame, next_frame):
+ """Based on number and size of missing frames use this logic to create a
+ dummy frame by interpolating.
+ combine the frame before and after the missing frame and find the missing
+ frame by calculating middle weight."""
+ logger.info("Interpolating")
+ return cv2.addWeighted(prev_frame, 0.5, next_frame, 0.5, 0)
+
+
+def repair_video(input_path, output_path):
+ logger.info("Open the file")
+ cap = cv2.VideoCapture(input_path)
+ if not cap.isOpened():
+ logger.error("Could not open video file.")
+ return
+
+ # Collect file metadata
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = cap.get(cv2.CAP_PROP_FPS)
+
+ logger.info(
+ "File info:\n"
+ f"\tFrames: \033[95m{frame_count}\033[0;32m\n"
+ f"\tFrame Width: \033[0;95m{width}\033[0;32m\n"
+ f"\tFPS: \033[0;95m{fps}\033[0m"
+ )
+
+ frames = []
+ # Remove missing frames
+ logger.info("Find missing frames and index them")
+ for _ in tqdm(range(frame_count), desc="Progress"):
+ ret, frame = cap.read()
+ if not ret:
+ frames.append(None)
+ else:
+ frames.append(frame)
+
+ cap.release()
+
+ """ Call function to detect missing frames and decide on the method to apply
+ depending on number of missing frames. If number is larger than frame_count * 0.1
+remove the missing frames else interpolate."""
+
+ missing_frames = detect_missing_frames(frames)
+ if (
+ len(missing_frames) > frame_count * 0.1
+ ): # Arbitrary threshold for many missing frames
+ frames = [f for f in frames if f is not None]
+ else:
+ for i in missing_frames:
+ """ Based on missing frame `i` find previous frame `frames[i-1]` and preceeding frame `frames[i+1]` wher both previous and preceeding are not missing. Use them to create the middle frame."""
+ if (
+ i > 0
+ and i < frame_count - 1
+ and frames[i - 1] is not None
+ and frames[i + 1] is not None
+ ):
+ frames[i] = interpolate_frame(frames[i - 1], frames[i + 1])
+ else:
+ """Where ..."""
+ frames[i] = (
+ frames[i - 1] if frames[i - 1] is not None else frames[i + 1]
+ )
+
+ # Create writer objectfor the frames
+ out = cv2.VideoWriter(
+ output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
+ )
+
+ # Write the new video to file
+ for frame in frames:
+ "Don't write empty frames"
+ if frame is not None:
+ out.write(frame)
+
+ out.release()
+ print("Video repair complete and saved to:", output_path)
+
+
+# Usage
+input_video_path = "/home/skye/Videos/supercar.mp4"
+output_video_path = "output_video.mp4"
+repair_video(input_video_path, output_video_path)
diff --git a/filemac/miscellaneous/video_analyzer.py b/filemac/miscellaneous/video_analyzer.py
new file mode 100644
index 0000000..49561c6
--- /dev/null
+++ b/filemac/miscellaneous/video_analyzer.py
@@ -0,0 +1,121 @@
+"""A basic/simple file analyzer"""
+
+import sys
+import cv2
+import numpy as np
+from ..utils.colors import fg, rs
+import ffmpeg
+
+RESET = rs
+
+
+class SimpleAnalyzer:
+ """Video - video object subject for analysis
+ return video`s: duration, total_area and frame_count"""
+
+ def __init__(self, video):
+ self.video = video
+
+ @staticmethod
+ def get_metadata(input_file):
+ """Fetch the original bitrate of the video file using ffmpeg."""
+ try:
+ probe = ffmpeg.probe(input_file)
+ print(probe.get("streams")[1])
+ bitrate = None
+ # Iterate over the streams and find the video stream
+ for stream in probe["streams"]:
+ bitrate = (
+ stream.get("bit_rate", None)
+ if stream["codec_type"] == "video"
+ else None
+ )
+ aspect_ratio = (
+ stream.get("sample_aspect_ratio")
+ if stream["sample_aspect_ratio"]
+ else None
+ )
+ codec_name = stream.get("codec_name") if stream["codec_name"] else None
+ channels = stream.get("channels")
+
+ encoder = stream.get("encoder") if stream.get("encoder") else None
+ break
+ return bitrate, aspect_ratio, codec_name, channels, encoder
+ except ffmpeg.Error as e:
+ raise
+ print(f"Error: {e}")
+ except Exception as e:
+ raise
+ print(f"Error: {e}")
+
+ def analyze(self):
+ """Read the video file/obj
+ Increase frame count and accumulate area
+ Calculate current frame duration
+ Display the resulting frame"""
+
+ try:
+ # Read the video file
+ cap = cv2.VideoCapture(self.video)
+ print(f"{fg.BYELLOW}Initializing..{RESET}")
+ # Initialize variables
+ # Frame rate (fps)
+ bitrate, aspect_ratio, codec_name, channels, encoder = self.get_metadata(
+ self.video
+ )
+ frame_count = 0
+ total_area = 0
+ duration = 0
+
+ print(f"{fg.DCYAN}Working on it{RESET}")
+ while True:
+ ret, frame = cap.read()
+
+ if not ret:
+ break
+ # Increase frame count and accumulate area
+ frame_count += 1
+ total_area += np.prod(frame.shape[:2])
+
+ # Calculate current frame duration
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ duration += 1 / fps
+
+ # Display the resulting frame
+ cv2.imshow("Frame", frame)
+
+ # Break the loop after pressing 'q'
+ if cv2.waitKey(1) == ord("q"):
+ break
+
+ # Release the video capture object and close all windows
+ cap.release()
+ cv2.destroyAllWindows()
+
+ # Print results
+ # print(f"Size {fg.BGREEN}{size}{RESET}Kb")
+ print(f"Channels: {fg.BGREEN}{channels}{RESET}")
+ print(f"Encoder {fg.BGREEN}{encoder}{RESET}")
+ print(f"Bitrate {fg.BGREEN}{bitrate}{RESET}")
+ print(f"Aspect ratio{fg.BGREEN}{aspect_ratio}{RESET}")
+ print(f"Codec name {fg.BGREEN}{codec_name}{RESET}")
+ print(f"Total Frames: {fg.BGREEN}{frame_count}{RESET}")
+ print(
+ f"Average Frame Area: {fg.BGREEN}{total_area / frame_count}{RESET}"
+ )
+ print(f"Duration: {fg.BGREEN}{duration:.2f}{RESET} seconds")
+ return frame_count, total_area, duration
+ except KeyboardInterrupt:
+ print("\nExiting")
+ sys.exit(1)
+ except TypeError:
+ pass
+ except Exception as e:
+ print(e)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ vi = SimpleAnalyzer("/home/skye/Videos/demo.mkv")
+ # SimpleAnalyzer.get_metadata("/home/skye/Videos/demo.mkv")
+ vi.analyze()
diff --git a/filemac/utils/__init__.py b/filemac/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/filemac/utils/colors.py b/filemac/utils/colors.py
new file mode 100644
index 0000000..def6fc7
--- /dev/null
+++ b/filemac/utils/colors.py
@@ -0,0 +1,271 @@
+import os
+
+from colorama import Fore, Style, init
+
+init(autoreset=True)
+
+
+class ForegroundColor:
+ if os.name == "posix":
+ RESET = "\033[0m" # Reset to default text color
+
+ # Red Variants
+ RED = "\033[91m" # Normal RED
+ BRED = "\033[1;91m" # Deep RED
+ FRED = "\033[2;91m" # Faint red
+ IRED = "\033[3;91m" # Indented RED
+ LRED = "\033[4;91m" # Underlined RED
+ URED = "\033[5;91m" # Blinking RED
+
+ # Green Variants
+ GREEN = "\033[92m" # Normal green
+ BGREEN = "\033[1;92m" # Deep green
+ FGREEN = "\033[2;92m" # Faint green
+ IGREEN = "\033[3;92m" # Indented GREEN
+ LGREEN = "\033[4;92m" # Underlined GREEN
+ UGREEN = "\033[5;92m" # Blinking GREEN
+
+ # Yellow Variants
+ YELLOW = "\033[93m" # Normal yellow
+ BYELLOW = "\033[1;93m" # Deep YELLOW
+ FYELLOW = "\033[2;93m" # Faint YELLOW
+ IYELLOW = "\033[3;93m" # Indented YELLOW
+ LYELLOW = "\033[4;93m" # Underlined YELLOW
+ UYELLOW = "\033[5;93m" # Blinking YELLOW
+
+ # Blue Variants
+ BLUE = "\033[94m" # Normal BLUE
+ BBLUE = "\033[1;94m" # Deep BLUE
+ FBLUE = "\033[2;94m" # Faint Blue
+ IBLUE = "\033[3;94m" # Indented BLUE
+ LBLUE = "\033[4;94m" # Underlined BLUE
+ UBLUE = "\033[5;94m" # Blinking BLUE
+
+ # Magenta Variants
+ MAGENTA = "\033[95m" # Normal MAGENTA
+ BMAGENTA = "\033[1;95m" # Deep MAGENTA
+ FMAGENTA = "\033[2;95m" # Faint MAGENTA
+ IMAGENTA = "\033[3;95m" # Indented MAGENTA
+ LMAGENTA = "\033[4;95m" # Underlined MAGENTA
+ UMAGENTA = "\033[5;95m" # Blinking MAGENTA
+
+ # Cyan Variants
+ CYAN = "\033[96m" # Normal cyan
+ DCYAN = "\033[1;96m" # Deep CYAN
+ FCYAN = "\033[2;96m" # Faint cyan
+ ICYAN = "\033[3;96m" # Indented CYAN
+ LCYAN = "\033[4;96m" # Underlined CYAN
+ UCYAN = "\033[5;96m" # Blinking CYAN
+
+ # White Variants
+ BWHITE = "\033[1m" # Bold white
+ BBWHITE = "\033[5;97;1m" # Bold Blinking white
+ WHITE = "\033[97m" # Normal white
+ DWHITE = "\033[1;97m" # Deep white
+ FWHITE = "\033[2;97m" # Faint white
+ IWHITE = "\033[3;97m" # Indented white
+ LWHITE = "\033[4;97m" # Underlined white
+ UWHITE = "\033[5;97m" # Blinking white
+
+ if os.name == "nt":
+ RESET = Style.RESET_ALL
+
+ # Red Variants
+ RED = Fore.LIGHTRED_EX
+ BRED = Fore.RED
+ FRED = Fore.RED
+ IRED = Fore.RED
+ LRED = Fore.LIGHTRED_EX # Underlined RED
+ URED = Fore.RED # Blinking not directly supported, using RED
+
+ # Green Variants
+ GREEN = Fore.LIGHTGREEN_EX
+ BGREEN = Fore.GREEN
+ FGREEN = Fore.GREEN
+ IGREEN = Fore.GREEN
+ LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN
+ UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN
+
+ # Yellow Variants
+ YELLOW = Fore.LIGHTYELLOW_EX
+ BYELLOW = Fore.YELLOW
+ FYELLOW = Fore.YELLOW
+ IYELLOW = Fore.YELLOW
+ LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW
+ UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW
+
+ # Blue Variants
+ BLUE = Fore.LIGHTBLUE_EX
+ BBLUE = Fore.BLUE
+ FBLUE = Fore.BLUE
+ IBLUE = Fore.BLUE
+ LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE
+ UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE
+
+ # Magenta Variants
+ MAGENTA = Fore.LIGHTMAGENTA_EX
+ BMAGENTA = Fore.MAGENTA
+ FMAGENTA = Fore.MAGENTA
+ IMAGENTA = Fore.LIGHTMAGENTA_EX
+ LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA
+ UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGENTA
+
+ # Cyan Variants
+ CYAN = Fore.LIGHTCYAN_EX
+ DCYAN = Fore.CYAN
+ ICYAN = Fore.WHITE # Indented CYAN
+ FCYAN = Fore.CYAN
+ LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN
+ UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN
+
+ # White Variants
+ BWHITE = Fore.WHITE
+ BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+ WHITE = Fore.WHITE
+ DWHITE = Fore.WHITE # Deep white (not distinct in colorama)
+ FWHITE = Fore.WHITE # Faint white (not distinct in colorama)
+ IWHITE = Fore.WHITE # Indented white (not distinct in colorama)
+ LWHITE = Fore.WHITE # Underlined white (not distinct in colorama)
+ UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+
+
+class BackgroundColor:
+ if os.name == "posix":
+ RESET = "\033[0m" # Reset to default text color
+
+ # Red Variants
+ RED = "\033[91m" # Normal RED
+ BRED = "\033[1;41m" # Deep RED
+ FRED = "\033[2;41m" # Faint red
+ IRED = "\033[3;41m" # Indented RED
+ LRED = "\033[4;41m" # Underlined RED
+ URED = "\033[5;41m" # Blinking RED
+
+ # Green Variants
+ GREEN = "\033[42m" # Normal green
+ BGREEN = "\033[1;42m" # Deep green
+ FGREEN = "\033[2;42m" # Faint green
+ IGREEN = "\033[3;42m" # Indented GREEN
+ LGREEN = "\033[4;42m" # Underlined GREEN
+ UGREEN = "\033[5;42m" # Blinking GREEN
+
+ # Yellow Variants
+ YELLOW = "\033[43m" # Normal yellow
+ BYELLOW = "\033[1;43m" # Deep YELLOW
+ FYELLOW = "\033[2;43m" # Faint YELLOW
+ IYELLOW = "\033[3;43m" # Indented YELLOW
+ LYELLOW = "\033[4;43m" # Underlined YELLOW
+ UYELLOW = "\033[5;43m" # Blinking YELLOW
+
+ # Blue Variants
+ BLUE = "\033[44m" # Normal BLUE
+ BBLUE = "\033[1;44m" # Deep BLUE
+ FBLUE = "\033[2;44m" # Faint Blue
+ IBLUE = "\033[3;44m" # Indented BLUE
+ LBLUE = "\033[4;44m" # Underlined BLUE
+ UBLUE = "\033[5;44m" # Blinking BLUE
+
+ # Magenta Variants
+ MAGENTA = "\033[45m" # Normal MAGENTA
+ BMAGENTA = "\033[1;45m" # Deep MAGENTA
+ FMAGENTA = "\033[2;45m" # Faint MAGENTA
+ IMAGENTA = "\033[3;45m" # Indented MAGENTA
+ LMAGENTA = "\033[4;45m" # Underlined MAGENTA
+ UMAGENTA = "\033[5;45m" # Blinking MAGENTA
+
+ # Cyan Variants
+ CYAN = "\033[46m" # Normal cyan
+ DCYAN = "\033[1;46m" # Deep CYAN
+ FCYAN = "\033[2;46m" # Faint cyan
+ ICYAN = "\033[3;46m" # Indented CYAN
+ LCYAN = "\033[4;46m" # Underlined CYAN
+ UCYAN = "\033[5;46m" # Blinking CYAN
+
+ # White Variants
+ BWHITE = "\033[1m" # Bold white
+ BBWHITE = "\033[5;47;1m" # Bold Blinking white
+ WHITE = "\033[47m" # Normal white
+ DWHITE = "\033[1;47m" # Deep white
+ FWHITE = "\033[2;47m" # Faint white
+ IWHITE = "\033[3;47m" # Indented white
+ LWHITE = "\033[4;47m" # Underlined white
+ UWHITE = "\033[5;47m" # Blinking white
+
+ BLACK = "\033[40m" # Black Background
+
+ if os.name == "nt":
+ RESET = Style.RESET_ALL
+
+ # Red Variants
+ RED = Fore.LIGHTRED_EX
+ BRED = Fore.RED
+ FRED = Fore.RED
+ IRED = Fore.RED
+ LRED = Fore.LIGHTRED_EX # Underlined RED
+ URED = Fore.RED # Blinking not directly supported, using RED
+
+ # Green Variants
+ GREEN = Fore.LIGHTGREEN_EX
+ BGREEN = Fore.GREEN
+ FGREEN = Fore.GREEN
+ IGREEN = Fore.GREEN
+ LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN
+ UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN
+
+ # Yellow Variants
+ YELLOW = Fore.LIGHTYELLOW_EX
+ BYELLOW = Fore.YELLOW
+ FYELLOW = Fore.YELLOW
+ IYELLOW = Fore.YELLOW
+ LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW
+ UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW
+
+ # Blue Variants
+ BLUE = Fore.LIGHTBLUE_EX
+ BBLUE = Fore.BLUE
+ FBLUE = Fore.BLUE
+ IBLUE = Fore.BLUE
+ LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE
+ UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE
+
+ # Magenta Variants
+ MAGENTA = Fore.LIGHTMAGENTA_EX
+ BMAGENTA = Fore.MAGENTA
+ FMAGENTA = Fore.MAGENTA
+ IMAGENTA = Fore.LIGHTMAGENTA_EX
+ LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA
+ UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGE
+
+ # Cyan Variants
+ CYAN = Fore.LIGHTCYAN_EX
+ DCYAN = Fore.CYAN
+ ICYAN = Fore.WHITE # Indented CYAN
+ FCYAN = Fore.CYAN
+ LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN
+ UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN
+
+ # White Variants
+ BWHITE = Fore.WHITE
+ BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+ WHITE = Fore.WHITE
+ DWHITE = Fore.WHITE # Deep white (not distinct in colorama)
+ FWHITE = Fore.WHITE # Faint white (not distinct in colorama)
+ IWHITE = Fore.WHITE # Indented white (not distinct in colorama)
+ LWHITE = Fore.WHITE # Underlined white (not distinct in colorama)
+ UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+
+
+fg = ForegroundColor()
+bg = BackgroundColor()
+rs = fg.RESET
+
+
+class OutputFormater:
+ """ANSI styles for output display"""
+
+ INFO = f"{fg.BLUE}[i]{rs}"
+ WARN = f"{fg.YELLOW}[!]{rs}"
+ ERR = f"{fg.RED}[x]{rs}"
+ EXP = f"{fg.MAGENTA}[⁉️]{rs}" # For exceptios
+ OK = f"{fg.GREEN}[✓]{rs}"
+ RESET = rs
diff --git a/filemac/utils/config.py b/filemac/utils/config.py
new file mode 100644
index 0000000..f5d7bd5
--- /dev/null
+++ b/filemac/utils/config.py
@@ -0,0 +1,11 @@
+from pathlib import Path
+import os
+
+BASE_DIR = Path(__file__).resolve().home()
+
+OUTPUT_DIR = BASE_DIR / "Documents"
+
+CACHE_DIR = BASE_DIR / "tmp/filemac"
+
+# Ensure cache dir exists
+os.makedirs(CACHE_DIR, exist_ok=True)
diff --git a/filemac/utils/decorators.py b/filemac/utils/decorators.py
new file mode 100644
index 0000000..cf9dc7f
--- /dev/null
+++ b/filemac/utils/decorators.py
@@ -0,0 +1,46 @@
+class Decorators:
+ @staticmethod
+ def for_loop(iterable: list | tuple | str):
+ """
+ A for loop decorator that calls the decorated function with each element
+ from the provided list or tuple.
+
+ Args:
+ data_list: A list, str or tuple of data to iterate over.
+ """
+
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ for item in iterable:
+ func(item, *args, **kwargs)
+
+ return wrapper
+
+ return decorator
+
+ @staticmethod
+ def while_loop(iterable: list | tuple | str):
+ """
+ A while loop decorator that calls the decorated function with each element
+ from the provided list or tuple.
+
+ Args:
+ iterable: A list, str or tuple of data to iterate over.
+ """
+
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ index = 0
+ while index <= len(iterable):
+ func(iterable[index], *args, **kwargs)
+ index += 1
+
+ return wrapper
+
+ return decorator
+
+ def threading(self):
+ ...
+
+
+dcr = Decorators()
diff --git a/filemac/utils/file_utils.py b/filemac/utils/file_utils.py
new file mode 100644
index 0000000..5bc5481
--- /dev/null
+++ b/filemac/utils/file_utils.py
@@ -0,0 +1,360 @@
+"""
+File utility functions for filemac.
+"""
+
+import fnmatch
+import os
+import shutil
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Iterator, List, Optional, Union
+
+from tqdm.auto import tqdm
+
+# from .colors import fg, rs
+from ..core.exceptions import FileSystemError
+from .colors import OutputFormater as OF
+from .config import OUTPUT_DIR
+from .formats import SUPPORTED_IMAGE_FORMATS, SUPPORTED_AUDIO_FORMATS, SUPPORTED_VIDEO_FORMATS
+from .simple import logger
+
+
+def map_ext_from_format(fmt: str) -> tuple:
+ if fmt in (x.lower() for x in SUPPORTED_AUDIO_FORMATS):
+ return fmt, SUPPORTED_AUDIO_FORMATS
+ elif fmt in (x.lower() for x in SUPPORTED_VIDEO_FORMATS):
+ return fmt, SUPPORTED_VIDEO_FORMATS
+ elif fmt in SUPPORTED_IMAGE_FORMATS.values():
+ return fmt, SUPPORTED_IMAGE_FORMATS
+ return None, None
+
+
+def dirbuster(_dir_, ext: list | tuple = ("pdf", "doc", "docx")) -> list:
+ try:
+ target = []
+ for root, dirs, files in os.walk(_dir_):
+ for file in files:
+ fext = file.split(".")[-1]
+
+ _path_ = os.path.join(root, file)
+
+ if os.path.exists(_path_) and fext.lower() in ext:
+ target.append(_path_)
+ return target
+ except FileNotFoundError as e:
+ print(e)
+
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ return
+
+
+def generate_filename(ext, basedir=OUTPUT_DIR, postfix="filemac") -> Path:
+ """
+ Generate Filename given its extension
+ Args:
+ ext-> str
+ basedir-> Path
+ postfix = str
+ Returns:
+ path
+ """
+
+ filename = OUTPUT_DIR / f"{uuid.uuid4().hex}-{postfix}.{ext}"
+
+ return filename
+
+
+class FileSystemHandler:
+ """
+ Encapsulates file handling utilities required by cleaner
+ """
+
+ def __init__(self, ignore: list | tuple = None):
+ self.ignore = ignore
+
+ def find_files(self, paths, patterns, recursive=True) -> list:
+ try:
+ candidates = []
+ for path in paths:
+ path_obj = Path(path).expanduser().resolve()
+ if not path_obj.exists():
+ continue
+ if recursive:
+ for file in tqdm(
+ path_obj.rglob("*"), desc="Searching", leave=False
+ ):
+ if file.is_file() and any(
+ fnmatch.fnmatch(file.name, pat) for pat in patterns
+ ):
+ candidates.append(file)
+ else:
+ for file in tqdm(path_obj.glob("*"), desc="Searching", leave=False):
+ if file.is_file() and any(
+ fnmatch.fnmatch(file.name, pat) for pat in patterns
+ ):
+ candidates.append(file)
+ return self.ignore_pattern(candidates)
+ except Exception as e:
+ raise FileSystemError(e)
+
+ def find_directories(self, paths, patterns, recursive=True, empty=True) -> list:
+ try:
+ candidates = []
+ for path in paths:
+ path_obj = Path(path).expanduser().resolve()
+ if not path_obj.exists():
+ continue
+ if recursive:
+ for root, dirs, files in tqdm(
+ os.walk(path_obj, followlinks=True),
+ desc="Searching",
+ leave=False,
+ ):
+ for dir in dirs:
+ if len(os.listdir(os.path.join(root, dir))) == 0:
+ candidates.append(Path(root) / dir)
+
+ else:
+ for item in tqdm(
+ os.listdir(path_obj), desc="Searching", leave=False
+ ):
+ if os.path.isdir(item) and len(os.listdir(item)) == 0:
+ candidates.append(path_obj / item)
+
+ return self.ignore_pattern(candidates)
+ except Exception as e:
+ raise FileSystemError(e)
+
+ def ignore_pattern(self, items: list | tuple, ignore: list | tuple = None) -> list:
+ ignore = self.ignore if not ignore else ignore
+ candidates = []
+ for item in items:
+ for ig in ignore:
+ _ig = ig.lower()
+ if _ig in item.as_uri().lower().split(
+ "/"
+ ) + item.as_uri().lower().split("\\"):
+ continue
+
+ candidates.append(item)
+
+ return candidates
+
+ @staticmethod
+ def _find_files(pattern: str, recursive: bool = True) -> Iterator[Path]:
+ """Find files matching pattern."""
+ path = Path(pattern)
+
+ if path.exists() and path.is_file():
+ yield path
+ return
+
+ # Handle glob patterns
+ if recursive:
+ yield from Path(".").rglob(pattern)
+ else:
+ yield from Path(".").glob(pattern)
+
+ @staticmethod
+ def delete_files(files) -> bool:
+ try:
+ for f in files:
+ if f.exists():
+ f.unlink()
+ print(f"{OF.OK} Deleted: {f}")
+ return True
+ except (PermissionError, OSError) as e:
+ raise FileSystemError(e)
+ except Exception as e:
+ print(f"{OF.ERR} Failed to delete {f}: {e}")
+ return False
+
+ @staticmethod
+ def delete_folders(files) -> bool:
+ try:
+ for f in files:
+ if f.exists():
+ f.rmdir()
+ print(f"{OF.OK} Deleted: {f}")
+ return True
+ except (PermissionError, OSError) as e:
+ raise FileSystemError(e)
+ except Exception as e:
+ print(f"{OF.ERR} Failed to delete {f}: {e}")
+ return False
+
+ @staticmethod
+ def ensure_directory(path: Path) -> Path:
+ """Ensure directory exists, create if necessary."""
+ try:
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+ except OSError as e:
+ raise FileSystemError(f"Failed to create directory {path}: {str(e)}")
+
+ @staticmethod
+ def safe_filename(name: str, max_length: int = 255) -> str:
+ """Convert string to safe filename."""
+ # Replace unsafe characters
+ safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in name)
+
+ # Remove extra spaces and underscores
+ safe_name = "_".join(filter(None, safe_name.split()))
+
+ # Trim to max length
+ if len(safe_name) > max_length:
+ name_hash = str(hash(safe_name))[-8:]
+ safe_name = safe_name[: max_length - 9] + "_" + name_hash
+
+ return safe_name
+
+
+class TemporaryFileManager:
+ """Manages temporary files with proper cleanup."""
+
+ def __init__(self, prefix: str = "kcleaner_"):
+ self.temp_files = []
+ self.temp_dirs = []
+ self.prefix = prefix
+
+ def create_temp_file(self, suffix: str, content: str = "") -> Path:
+ """Create a temporary file with the given suffix and content."""
+ try:
+ with tempfile.NamedTemporaryFile(
+ mode="w",
+ suffix=suffix,
+ prefix=self.prefix,
+ encoding="utf-8",
+ delete=False,
+ ) as f:
+ if content:
+ f.write(content)
+ temp_path = Path(f.name)
+
+ self.temp_files.append(temp_path)
+ return temp_path
+
+ except (OSError, IOError) as e:
+ raise FileSystemError(f"Failed to create temporary file: {str(e)}")
+
+ def create_temp_dir(self) -> Path:
+ """Create a temporary directory."""
+ try:
+ temp_dir = Path(tempfile.mkdtemp(prefix=self.prefix))
+ self.temp_dirs.append(temp_dir)
+ return temp_dir
+ except OSError as e:
+ raise FileSystemError(f"Failed to create temporary directory: {str(e)}")
+
+ def cleanup(self):
+ """Clean up all temporary files and directories."""
+ for temp_file in self.temp_files:
+ try:
+ if temp_file.exists():
+ temp_file.unlink()
+ except OSError as e:
+ logger.warning(f"Failed to delete temporary file {temp_file}: {e}")
+
+ for temp_dir in self.temp_dirs:
+ try:
+ if temp_dir.exists():
+ shutil.rmtree(temp_dir)
+ except OSError as e:
+ logger.warning(f"Failed to delete temporary directory {temp_dir}: {e}")
+
+ self.temp_files.clear()
+ self.temp_dirs.clear()
+
+
+class DirectoryScanner:
+ def __init__(self, input_obj: Optional[Union[str, list[str], os.PathLike]]):
+ self.input_obj = input_obj
+
+ def get_dir_files(self):
+ """
+ Get file path list given dir/folder
+
+ -------
+ Args:
+ path: path to the directory/folder
+ Returns:
+ -------
+ list
+ """
+ files = [
+ os.path.join(self.input_obj, f)
+ for f in os.listdir(self.input_obj)
+ if os.path.isfile(os.path.join(self.input_obj, f))
+ and self._is_supported_image(f)
+ ]
+ if not files: # Check for empty directory *after* filtering
+ raise FileNotFoundError(
+ f"No supported image files found in: {self.input_obj}"
+ )
+ return files
+
+ def _is_supported_image(self, filename: str) -> bool:
+ """Checks if a file has a supported image extension."""
+ return filename.lower().endswith(tuple(SUPPORTED_IMAGE_FORMATS.values()))
+
+ def _get_image_files(self, files: list = None) -> List[str]:
+ """
+ Identifies image files to process, handling both single files and directories.
+
+ Returns:
+ A list of paths to image files. Raises FileNotFoundError if no
+ valid image files are found.
+ """
+ files = self.input_obj if not files else files
+
+ if isinstance(files, (str, os.PathLike)):
+ if os.path.isfile(files):
+ return [files]
+ else:
+ return self.get_dir_files(files)
+
+ files_to_process = []
+ for obj in files:
+ if os.path.isfile(obj):
+ if self._is_supported_image(obj):
+ files_to_process.append(obj)
+ else:
+ logger.warning(f"Skipping unsupported file: {obj}")
+
+ elif os.path.isdir(obj):
+ files = self.get_dir_files(obj)
+ if not files: # Check for empty directory *after* filtering
+ raise FileNotFoundError(f"No supported image files found in: {obj}")
+ files_to_process.extend(files)
+ else:
+ raise FileNotFoundError(
+ f"Input is not a valid file or directory: {obj}"
+ )
+ return files_to_process
+
+ def run(self):
+ supported_files = self._get_image_files(self.input_obj)
+ return supported_files
+
+
+def modify_filename_if_exists(filename):
+ """
+ Modifies the filename by adding "_filemac" before the extension if the original filename exists.
+
+ Args:
+ filename (str): The filename to modify.
+
+ Returns:
+ str: The modified filename, or the original filename if it doesn't exist or has no extension.
+ """
+ if os.path.exists(filename):
+ parts = filename.rsplit(".", 1) # Split from the right, at most once
+ if len(parts) == 2:
+ base, ext = parts
+ return f"{base}_filemac.{ext}"
+ else:
+ return f"{filename}_filemac" # handle files with no extension.
+ else:
+ return filename
diff --git a/filemac/utils/formats.py b/filemac/utils/formats.py
new file mode 100644
index 0000000..35e472e
--- /dev/null
+++ b/filemac/utils/formats.py
@@ -0,0 +1,170 @@
+# multimedia_cli/formats
+from .colors import fg, bg, rs
+
+
+RESET = rs
+
+SUPPORTED_DOC_FORMATS = ["pdf", 'ppt', 'pptx', 'doc', 'docx', 'xls', 'xlsx', 'txt']
+
+SUPPORTED_DOC_FORMATS_HELP = f"""
+|---------------------------------------------------------------------------
+|{bg.BBLUE}Input format{RESET} |{bg.BBLUE}Output format{RESET} |
+|________________________________|__________________________________________|
+| xlsx {fg.BYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) |
+| | |
+| doc/docx{fg.BYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) |
+| | |
+| txt {fg.BYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) |
+| | |
+| pdf {fg.BYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) |
+| | |
+| pptx/ppt{fg.BYELLOW}-------------------->{RESET}|doc/docx |
+| |
+|___________________________________________________________________________|
+"""
+
+
+# Add supported input and output formats for each media type
+SUPPORTED_AUDIO_FORMATS = [
+ "wav", # Waveform Audio File Format
+ "mp3", # MPEG Audio Layer III
+ "ogg",
+ "flv",
+ "ogv",
+ "webm",
+ "aiff",
+ "flac",
+ "m4a",
+ "raw",
+ "bpf",
+ "aac",
+] # Advanced Audio Codec (Free Lossless Audio Codec)
+
+SUPPORTED_AUDIO_FORMATS_DIRECT = [
+ "mp3",
+ "wav",
+ "raw",
+ "ogg",
+ "aiff",
+ "flac",
+ "flv", # Flash Video
+ "webm",
+ "ogv",
+] # Video
+SUPPORTED_AUDIO_FORMATS_SHOW = f"""
+|==============================|
+| {bg.BBLUE}Supported I/O formats {RESET} |
+|==============================|
+| {fg.CYAN} wav {fg.BYELLOW} |
+| {fg.CYAN} mp3 {fg.BYELLOW} |
+| {fg.CYAN} ogg {fg.BYELLOW} |
+| {fg.CYAN} flv {fg.BYELLOW} |
+| {fg.CYAN} ogv {fg.BYELLOW} |
+| {fg.CYAN} mov {fg.BYELLOW} |
+| {fg.CYAN} webm {fg.BYELLOW} |
+| {fg.CYAN} aac {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+| {fg.CYAN} bpf {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+| {fg.CYAN} m4a {fg.BYELLOW} |
+| {fg.CYAN} raw {fg.BYELLOW} |
+| {fg.CYAN} aiff {fg.BYELLOW} |
+--------------------------------
+
+"""
+
+SUPPORTED_VIDEO_FORMATS = [
+ "MP4", # MPEG-4 part 14 Bitrate - 860kb/s
+ "AVI", # Audio Video Interleave
+ "OGV",
+ "WEBM",
+ "MOV", # QuickTime video Bitrate - 1.01mb/s
+ "MKV", # Matroska video - MKV is known for its support of high-quality content. Bitrate-1.01mb/s
+ "FLV", # Flash video Bitrate
+ "WMV",
+]
+
+
+Video_codecs = {
+ "MP4": "mpeg4",
+ "AVI": "rawvideo",
+ # "OGV": "avc",
+ "WEBM": "libvpx",
+ "MOV": "mpeg4", # QuickTime video
+ "MKV": "mpeg4", # Matroska video
+ "FLV": "flv",
+ # "WMV": "WMV"
+}
+SUPPORTED_VIDEO_FORMATS_SHOW = f"""
+,_______________________________________,
+|x| {bg.BBLUE}Supported I/O formats{RESET} |x|
+|x|-----------------------------------{fg.BYELLOW}|x|
+|x| {fg.BMAGENTA} MP4 {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} AVI {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} OGV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} WEBM{fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} MOV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} MKV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} FLV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} WMV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+|,|___________________.BMAGENTA________________|,|{fg.BYELLOW}
+"""
+
+SUPPORTED_IMAGE_FORMATS = {
+ "JPEG": ".jpeg", # Joint Photographic Experts Group -Lossy compression
+ "JPG": ".jpg", # Joint Photographic Experts Group - not lossy
+ "PNG": ".png",
+ "GIF": ".gif", # Graphics Interchange Format
+ "BMP": ".bmp", # Windows BMP image
+ "DIB": ".dib", # Windows BMP image
+ "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression
+ "PIC": ".pic",
+ "PDF": ".pdf",
+ "WEBP": ".webp",
+ "EPS": ".eps",
+ "ICNS": ".icns", # MacOS X icon
+ # Waiting Implementation 👇
+ "PSD": ".psd",
+ "SVG": ".svg", # Scalable vector Graphics
+ "EXR": ".exr",
+ "DXF": ".dxf", # Autocad format 2D
+ "PICT": ".pct",
+ "PS": ".ps", # PostSciript
+ "POSTSCRIPT": ".ps",
+}
+
+SUPPORTED_IMAGE_FORMATS_SHOW = f"""
+__________________________________________
+|x|{bg.BBLUE}Supported I/O formats{RESET} |x|
+|x|_____________________________________{fg.BYELLOW}|x|
+|x| {fg.BMAGENTA} JPEG {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PNG {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} GIF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} BMP {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} DIB {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} TIFF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PIC {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} EXR {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} PDF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} WebP {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} ICNS {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PSD {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} SVG {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} EPS {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} Postscript {fg.FMAGENTA}---------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} PICT {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|_|_____________________________________|x|
+"""
+
+SUPPORTED_DOCUMENT_FORMATS = [
+ "pdf",
+ "doc",
+ "docx",
+ "csv",
+ "xlsx",
+ "xls",
+ "ppt",
+ "pptx",
+ "txt",
+ "ogg",
+ "mp3",
+ "audio",
+]
diff --git a/build/lib/filemac/handle_warnings.py b/filemac/utils/handle_warnings.py
similarity index 100%
rename from build/lib/filemac/handle_warnings.py
rename to filemac/utils/handle_warnings.py
diff --git a/filemac/utils/helpmaster.py b/filemac/utils/helpmaster.py
new file mode 100644
index 0000000..4b17d4a
--- /dev/null
+++ b/filemac/utils/helpmaster.py
@@ -0,0 +1,25 @@
+from .utils.colors import fg, rs
+
+
+RESET = rs
+
+
+def pdf_combine_help():
+ options = f"""
+ _________________________
+ {fg.BWHITE}|Linear: {fg.YELLOW}AA/BB/AAB/BBA{RESET} |
+ {fg.BWHITE}|Shifted: {fg.YELLOW}AB/BA/ABA/BAB{RESET} |
+ _________________________"""
+
+ helper = f"""\n\t---------------------------------------------------------------------------------------------
+ {fg.BWHITE}|Currently There are 2 supported methods: {fg.FCYAN}Linear and Alternating/shifting.{RESET}\t\t |
+ |-------------------------------------------------------------------------------------------|
+ {fg.BWHITE}|->Linear pages are ordered in form of: {fg.CYAN}File1Page1,...Fil1Pagen{RESET} then {fg.CYAN}File2Page1,...Fil2Pagen{RESET}|\n\t{fg.BWHITE}|File2 is joined at the end of the file1.\t\t\t\t\t\t |
+ |-------------------------------------------------------------------------------------------|
+ {fg.BWHITE}|->Shifting method Picks: {fg.CYAN}File1Page1, File2Page1...File1pagen,File2Pagen{RESET}\t\t |
+ |--------------------------------------------------------------------------------------------"""
+
+ ex = f"""\t_____________________________________________________
+ \t|->{fg.BBLUE}filemac --pdfjoin file1.pdf file2.pdf --order AAB{RESET}|
+ \t-----------------------------------------------------"""
+ return options, helper, ex
diff --git a/filemac/utils/logging_utils.py b/filemac/utils/logging_utils.py
new file mode 100644
index 0000000..b7162f6
--- /dev/null
+++ b/filemac/utils/logging_utils.py
@@ -0,0 +1,73 @@
+"""
+Logging configuration for Filemac.
+"""
+
+import logging
+import sys
+from typing import Optional
+
+
+def setup_logging(
+ level: int = logging.INFO,
+ format_string: Optional[str] = None,
+ log_file: Optional[str] = None,
+) -> logging.Logger:
+ """
+ Setup logging configuration for kcleaner.
+
+ Args:
+ level: Logging level
+ format_string: Custom format string
+ log_file: Optional log file path
+
+ Returns:
+ Configured logger
+ """
+ if format_string is None:
+ format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+ formatter = logging.Formatter(format_string)
+
+ # Root logger
+ logger = logging.getLogger("filemac")
+ logger.setLevel(level)
+
+ # Clear existing handlers
+ for handler in logger.handlers[:]:
+ logger.removeHandler(handler)
+
+ # Console handler
+ console_handler = logging.StreamHandler(sys.stdout)
+ console_handler.setFormatter(formatter)
+ logger.addHandler(console_handler)
+
+ # File handler if specified
+ if log_file:
+ file_handler = logging.FileHandler(log_file, encoding="utf-8")
+ file_handler.setFormatter(formatter)
+ logger.addHandler(file_handler)
+
+ # Prevent propagation to root logger
+ logger.propagate = False
+
+ return logger
+
+
+class LoggingContext:
+ """Context manager for temporary logging configuration."""
+
+ def __init__(self, level: int = logging.INFO, log_file: Optional[str] = None):
+ self.level = level
+ self.log_file = log_file
+ self.original_level = None
+ self.file_handler = None
+
+ def __enter__(self):
+ self.original_level = logging.getLogger("filemac").level
+ setup_logging(level=self.level, log_file=self.log_file)
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ setup_logging(level=self.original_level)
+ if self.file_handler:
+ logging.getLogger("filemac").removeHandler(self.file_handler)
diff --git a/filemac/utils/screen.py b/filemac/utils/screen.py
new file mode 100644
index 0000000..3c6d4de
--- /dev/null
+++ b/filemac/utils/screen.py
@@ -0,0 +1,19 @@
+"""Provides screen actions like clearing screen etc."""
+
+import os
+import ctypes
+
+
+def clear_screen():
+ """
+ Clear the screen using ctypes in windows and os.system('clear') in unix systems
+ """
+ if os.name == "nt": # Windows system
+ ctypes.windll.kernel32.SetConsoleCursorPosition(
+ ctypes.windll.kernel32.GetStdHandle(-11), (0, 0)
+ )
+ ctypes.windll.kernel32.FillConsoleOutputCharacter(
+ ctypes.windll.kernel32.GetStdHandle(-11), b"\x00", 80 * 10, (0, 0)
+ )
+ else: # Unix/Linux/MacOS systems
+ os.system("clear")
diff --git a/filemac/utils/security/vul_mitigate.py b/filemac/utils/security/vul_mitigate.py
new file mode 100644
index 0000000..398fb6e
--- /dev/null
+++ b/filemac/utils/security/vul_mitigate.py
@@ -0,0 +1,177 @@
+import os
+import subprocess
+import sqlite3
+
+# import shlex
+import json
+import tempfile
+import logging
+import html
+import requests
+from dotenv import load_dotenv
+from ...core.exceptions import ValidationError
+
+# from importlib import resources
+from ..colors import fg, rs
+
+RESET = rs
+
+pkg_resources = []
+
+
+class SecurePython:
+ def __init__(self):
+ """Initialize security mitigations."""
+ load_dotenv() # Load environment variables for secret management
+ logging.basicConfig(level=logging.INFO)
+
+ # ✅ 1. Prevent Command Injection
+ def secure_subprocess(self, command_list):
+ """Runs a secure subprocess command using a list format to prevent command injection."""
+ if not isinstance(command_list, list):
+ raise ValidationError("Command must be a list")
+ try:
+ result = subprocess.run(
+ command_list, check=True, capture_output=True, text=True
+ )
+ return result.stdout
+ except subprocess.CalledProcessError as e:
+ logging.error(f"Command failed: {e}")
+ return None
+
+ # ✅ 2. Prevent Path Traversal
+ def safe_filepath(self, base_dir, user_input_path):
+ """Prevents path traversal by restricting access to a safe base directory."""
+ full_path = os.path.abspath(os.path.join(base_dir, user_input_path))
+
+ if not full_path.startswith(os.path.abspath(base_dir)):
+ raise ValueError("Invalid file path: Path traversal attempt detected")
+ print(f"{fg.BBLUE}Return safe path: {fg.BGREEN}{full_path}{RESET}")
+ return full_path
+
+ # ✅ 3. Prevent SQL Injection
+ def safe_sql_query(self, db_path, query, params):
+ """Executes a parameterized SQL query to prevent SQL injection."""
+ conn = sqlite3.connect(db_path)
+ cursor = conn.cursor()
+ try:
+ cursor.execute(query, params)
+ result = cursor.fetchall()
+ conn.commit()
+ return result
+ except sqlite3.Error as e:
+ logging.error(f"SQL error: {e}")
+ return None
+ finally:
+ conn.close()
+
+ # ✅ 4. Secure File Handling
+ def secure_temp_file(self, content):
+ """Creates a secure temporary file to prevent race conditions."""
+ with tempfile.NamedTemporaryFile(delete=True) as temp_file:
+ temp_file.write(content.encode())
+ temp_file.flush()
+ return temp_file.name # Return temp file path for safe use
+
+ # ✅ 5. Secure Secret Management
+ def get_secret(self, key):
+ """Fetches secrets from environment variables."""
+ secret = os.getenv(key)
+ if not secret:
+ logging.warning(f"Secret {key} is missing!")
+ return secret
+
+ # ✅ 6. Prevent Insecure Deserialization
+ def safe_json_load(self, json_string):
+ """Safely loads JSON instead of using pickle to avoid remote code execution."""
+ try:
+ return json.loads(json_string)
+ except json.JSONDecodeError as e:
+ logging.error(f"Invalid JSON: {e}")
+ return None
+
+ # ✅ 7. Prevent XSS Attacks
+ def sanitize_html(self, user_input):
+ """Escapes HTML to prevent XSS attacks."""
+ return html.escape(user_input)
+
+ # ✅ 8. Check Dependency Vulnerabilities
+ def check_dependencies(self):
+ """Checks installed dependencies for known vulnerabilities."""
+ try:
+ installed_packages = {
+ pkg.key: pkg.version for pkg in pkg_resources.working_set
+ }
+ response = requests.get("https://pyup.io/api/v1/safety/")
+ if response.status_code == 200:
+ vulnerable_packages = []
+ for package, version in installed_packages.items():
+ if package in response.json():
+ vulnerable_packages.append(package)
+ if vulnerable_packages:
+ logging.warning(
+ f"Vulnerable dependencies found: {vulnerable_packages}"
+ )
+ else:
+ logging.info("No known vulnerable dependencies detected.")
+ else:
+ logging.warning("Failed to fetch vulnerability database.")
+ except Exception as e:
+ logging.error(f"Error checking dependencies: {e}")
+
+ # ✅ 9. Secure Logging
+ def secure_logging(self, message):
+ """Logs messages securely without sensitive data exposure."""
+ sanitized_message = message.replace("password", "*****").replace(
+ "API_KEY", "*****"
+ )
+ logging.info(sanitized_message)
+
+ # ✅ 10. Run All Security Mitigations
+ def entry_run(self):
+ """Runs all security mitigations where applicable."""
+ logging.info("🔒 Running security mitigations...")
+
+ # Example secure execution
+ self.secure_subprocess(["echo", "Secure Execution"])
+
+ # Example secure file path usage
+ try:
+ safe_path = self.safe_filepath("/safe/directory", "../etc/passwd")
+ logging.info(f"Safe path resolved: {safe_path}")
+ except ValueError as e:
+ logging.error(e)
+
+ # Example secure SQL execution
+ self.safe_sql_query(":memory:", "CREATE TABLE test (id INTEGER, name TEXT)", ())
+ self.safe_sql_query(
+ ":memory:", "INSERT INTO test (id, name) VALUES (?, ?)", (1, "John Doe")
+ )
+
+ # Example secure file handling
+ temp_file = self.secure_temp_file("Secure data")
+ logging.info(f"Created secure temp file at {temp_file}")
+
+ # Example secret fetching
+ self.get_secret("API_KEY")
+
+ # Example safe JSON parsing
+ self.safe_json_load('{"key": "value"}')
+
+ # Example HTML sanitization
+ sanitized_html = self.sanitize_html("")
+ logging.info(f"Sanitized HTML: {sanitized_html}")
+
+ # Example dependency check
+ self.check_dependencies()
+
+ # Example secure logging
+ self.secure_logging("User attempted login with password: mypassword")
+
+ logging.info("✅ All security mitigations executed successfully!")
+
+
+# === Run SecurePython Class ===
+if __name__ == "__main__":
+ sp = SecurePython()
+ sp.entry_run()
diff --git a/filemac/utils/simple.py b/filemac/utils/simple.py
new file mode 100644
index 0000000..e40164c
--- /dev/null
+++ b/filemac/utils/simple.py
@@ -0,0 +1,8 @@
+import logging
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(levelname)s - %(message)s",
+)
+logger = logging.getLogger("filemac")
diff --git a/fweb/README.md b/fweb/README.md
new file mode 100644
index 0000000..7757011
--- /dev/null
+++ b/fweb/README.md
@@ -0,0 +1,26 @@
+## Architecture Overview
+### Project Structure
+```text
+filemac_web/
+├── filemac_web/ # Django project
+│ ├── settings.py
+│ ├── urls.py
+│ └── wsgi.py
+├── filemac_app/ # Main application
+│ ├── models.py
+│ ├── views.py
+│ ├── urls.py
+│ ├── forms.py
+│ └── utils.py
+├── templates/
+│ ├── base.html
+│ ├── index.html
+│ ├── dashboard.html
+│ ├── converters/
+│ └── results/
+├── static/
+│ ├── css/
+│ ├── js/
+│ └── images/
+└── media/ # Uploaded files
+```
diff --git a/fweb/core/__init__.py b/fweb/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fweb/core/admin.py b/fweb/core/admin.py
new file mode 100644
index 0000000..cc64292
--- /dev/null
+++ b/fweb/core/admin.py
@@ -0,0 +1,30 @@
+from django.contrib import admin
+from .models import ProcessingJob, ProcessedFile
+
+
+@admin.register(ProcessingJob)
+class ProcessingJobAdmin(admin.ModelAdmin):
+ list_display = ("job_id", "tool_id", "user", "status", "progress", "created_at")
+ list_filter = ("status", "tool_id", "created_at")
+ search_fields = ("job_id", "tool_id", "user__username")
+ readonly_fields = ("created_at", "updated_at")
+ fieldsets = (
+ (None, {"fields": ("job_id", "user", "tool_id", "status", "progress")}),
+ ("Files", {"fields": ("input_files", "output_files")}),
+ ("Timestamps", {"fields": ("created_at", "updated_at")}),
+ ("Error", {"fields": ("error_message",), "classes": ("collapse",)}),
+ )
+
+
+@admin.register(ProcessedFile)
+class ProcessedFileAdmin(admin.ModelAdmin):
+ list_display = (
+ "original_name",
+ "processed_name",
+ "job",
+ "file_size",
+ "processed_at",
+ )
+ list_filter = ("job__tool_id", "processed_at")
+ search_fields = ("original_name", "processed_name", "job__job_id")
+ readonly_fields = ("processed_at",)
diff --git a/fweb/core/apps.py b/fweb/core/apps.py
new file mode 100644
index 0000000..bde16cf
--- /dev/null
+++ b/fweb/core/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class FilemacAppConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "core"
diff --git a/fweb/core/config.py b/fweb/core/config.py
new file mode 100644
index 0000000..fb6f9e5
--- /dev/null
+++ b/fweb/core/config.py
@@ -0,0 +1,206 @@
+# Tool configurations
+TOOL_CONFIGS = {
+ "document": {
+ "icon": "file-pdf",
+ "color": "blue",
+ "description": "Document conversion and processing tools",
+ "tools": [
+ {
+ "id": "convert_doc",
+ "name": "Document Conversion",
+ "icon": """M566.6 214.6L470.6 310.6C461.4 319.8 447.7 322.5 435.7 317.5C423.7 312.5 416 300.9 416 288L416 224L96 224C78.3 224 64 209.7 64 192C64 174.3 78.3 160 96 160L416 160L416 96C416 83.1 423.8 71.4 435.8 66.4C447.8 61.4 461.5 64.2 470.7 73.3L566.7 169.3C579.2 181.8 579.2 202.1 566.7 214.6zM169.3 566.6L73.3 470.6C60.8 458.1 60.8 437.8 73.3 425.3L169.3 329.3C178.5 320.1 192.2 317.4 204.2 322.4C216.2 327.4 224 339.1 224 352L224 416L544 416C561.7 416 576 430.3 576 448C576 465.7 561.7 480 544 480L224 480L224 544C224 556.9 216.2 568.6 204.2 573.6C192.2 578.6 178.5 575.8 169.3 566.7z""",
+ "color": "blue",
+ "description": "Convert between PDF, DOCX, TXT, and other document formats",
+ },
+ {
+ "id": "pdf_join",
+ "name": "PDF Joining",
+ "icon": """M288 64C252.7 64 224 92.7 224 128L224 384C224 419.3 252.7 448 288 448L480 448C515.3 448 544 419.3 544 384L544 183.4C544 166 536.9 149.3 524.3 137.2L466.6 81.8C454.7 70.4 438.8 64 422.3 64L288 64zM160 192C124.7 192 96 220.7 96 256L96 512C96 547.3 124.7 576 160 576L352 576C387.3 576 416 547.3 416 512L416 496L352 496L352 512L160 512L160 256L176 256L176 192L160 192z""",
+ "color": "red",
+ "description": "Merge multiple PDF files into a single document",
+ },
+ {
+ "id": "scan_pdf",
+ "name": "PDF Text Extraction",
+ "icon": """M480 272C480 317.9 465.1 360.3 440 394.7L566.6 521.4C579.1 533.9 579.1 554.2 566.6 566.7C554.1 579.2 533.8 579.2 521.3 566.7L394.7 440C360.3 465.1 317.9 480 272 480C157.1 480 64 386.9 64 272C64 157.1 157.1 64 272 64C386.9 64 480 157.1 480 272zM272 416C351.5 416 416 351.5 416 272C416 192.5 351.5 128 272 128C192.5 128 128 192.5 128 272C128 351.5 192.5 416 272 416z""",
+ "color": "green",
+ "description": "Extract text from PDF documents using OCR",
+ },
+ {
+ "id": "doc_long_image",
+ "name": "Document to Long Image",
+ "icon": """M160 96C124.7 96 96 124.7 96 160L96 480C96 515.3 124.7 544 160 544L480 544C515.3 544 544 515.3 544 480L544 160C544 124.7 515.3 96 480 96L160 96zM224 176C250.5 176 272 197.5 272 224C272 250.5 250.5 272 224 272C197.5 272 176 250.5 176 224C176 197.5 197.5 176 224 176zM368 288C376.4 288 384.1 292.4 388.5 299.5L476.5 443.5C481 450.9 481.2 460.2 477 467.8C472.8 475.4 464.7 480 456 480L184 480C175.1 480 166.8 475 162.7 467.1C158.6 459.2 159.2 449.6 164.3 442.3L220.3 362.3C224.8 355.9 232.1 352.1 240 352.1C247.9 352.1 255.2 355.9 259.7 362.3L286.1 400.1L347.5 299.6C351.9 292.5 359.6 288.1 368 288.1z""",
+ "color": "purple",
+ "description": "Convert documents to long continuous images",
+ },
+ {
+ "id": "extract_pages",
+ "name": "Extract PDF Pages",
+ "icon": """M128.5 64C93.2 64 64.5 92.7 64.5 128L64.5 512C64.5 547.3 93.2 576 128.5 576L384.5 576C419.8 576 448.5 547.3 448.5 512L448.5 416L526.6 416L495.6 447C486.2 456.4 486.2 471.6 495.6 480.9C505 490.2 520.2 490.3 529.5 480.9L601.5 408.9C610.9 399.5 610.9 384.3 601.5 375L529.5 303C520.1 293.6 504.9 293.6 495.6 303C486.3 312.4 486.2 327.6 495.6 336.9L526.6 367.9L448.5 367.9L448.5 234.4C448.5 217.4 441.8 201.1 429.8 189.1L323.2 82.7C311.2 70.7 295 64 278 64L128.5 64zM390 240L296.5 240C283.2 240 272.5 229.3 272.5 216L272.5 122.5L390 240zM256.5 392C256.5 378.7 267.2 368 280.5 368L384.5 368L384.5 416L280.5 416C267.2 416 256.5 405.3 256.5 392z""",
+ "color": "orange",
+ "description": "Extract specific pages from PDF documents",
+ },
+ # { no-longer viable
+ # "id": "Atext2word",
+ # "name": "Advanced Text to Word",
+ # "icon": """M72 96C49.9 96 32 113.9 32 136L32 192C32 209.7 46.3 224 64 224C81.7 224 96 209.7 96 192L96 160L160 160L160 480L128 480C110.3 480 96 494.3 96 512C96 529.7 110.3 544 128 544L256 544C273.7 544 288 529.7 288 512C288 494.3 273.7 480 256 480L224 480L224 160L288 160L288 192C288 209.7 302.3 224 320 224C337.7 224 352 209.7 352 192L352 136C352 113.9 334.1 96 312 96L72 96zM470.6 425.4C458.1 412.9 437.8 412.9 425.3 425.4C412.8 437.9 412.8 458.2 425.3 470.7L489.3 534.7C501.8 547.2 522.1 547.2 534.6 534.7L598.6 470.7C611.1 458.2 611.1 437.9 598.6 425.4C586.1 412.9 565.8 412.9 553.3 425.4L543.9 434.8L543.9 205.3L553.3 214.7C565.8 227.2 586.1 227.2 598.6 214.7C611.1 202.2 611.1 181.9 598.6 169.4L534.6 105.4C528.6 99.4 520.5 96 512 96C503.5 96 495.4 99.4 489.4 105.4L425.4 169.4C412.9 181.9 412.9 202.2 425.4 214.7C437.9 227.2 458.2 227.2 470.7 214.7L480.1 205.3L480.1 434.8L470.7 425.4z""",
+ # "color": "indigo",
+ # "description": "Convert text files to Word documents with formatting",
+ # },
+ {
+ "id": "doc2image",
+ "name": "Document to Images",
+ "icon": """M128 128C128 92.7 156.7 64 192 64L341.5 64C358.5 64 374.8 70.7 386.8 82.7L493.3 189.3C505.3 201.3 512 217.6 512 234.6L512 512C512 547.3 483.3 576 448 576L192 576C156.7 576 128 547.3 128 512L128 128zM336 122.5L336 216C336 229.3 346.7 240 360 240L453.5 240L336 122.5zM256 320C256 302.3 241.7 288 224 288C206.3 288 192 302.3 192 320C192 337.7 206.3 352 224 352C241.7 352 256 337.7 256 320zM220.6 512L419.4 512C435.2 512 448 499.2 448 483.4C448 476.1 445.2 469 440.1 463.7L343.3 361.9C337.3 355.6 328.9 352 320.1 352L319.8 352C311 352 302.7 355.6 296.6 361.9L199.9 463.7C194.8 469 192 476.1 192 483.4C192 499.2 204.8 512 220.6 512z""",
+ "color": "pink",
+ "description": "Convert documents to image formats",
+ },
+ ],
+ },
+ "image": {
+ "icon": "image",
+ "color": "red",
+ "description": "Image processing and conversion tools",
+ "tools": [
+ {
+ "id": "convert_image",
+ "name": "Image Conversion",
+ "icon": """M544.1 256L552 256C565.3 256 576 245.3 576 232L576 88C576 78.3 570.2 69.5 561.2 65.8C552.2 62.1 541.9 64.2 535 71L483.3 122.8C439 86.1 382 64 320 64C191 64 84.3 159.4 66.6 283.5C64.1 301 76.2 317.2 93.7 319.7C111.2 322.2 127.4 310 129.9 292.6C143.2 199.5 223.3 128 320 128C364.4 128 405.2 143 437.7 168.3L391 215C384.1 221.9 382.1 232.2 385.8 241.2C389.5 250.2 398.3 256 408 256L544.1 256zM573.5 356.5C576 339 563.8 322.8 546.4 320.3C529 317.8 512.7 330 510.2 347.4C496.9 440.4 416.8 511.9 320.1 511.9C275.7 511.9 234.9 496.9 202.4 471.6L249 425C255.9 418.1 257.9 407.8 254.2 398.8C250.5 389.8 241.7 384 232 384L88 384C74.7 384 64 394.7 64 408L64 552C64 561.7 69.8 570.5 78.8 574.2C87.8 577.9 98.1 575.8 105 569L156.8 517.2C201 553.9 258 576 320 576C449 576 555.7 480.6 573.4 356.5z""",
+ "color": "red",
+ "description": "Convert between PNG, JPG, WEBP, and other image formats",
+ },
+ {
+ "id": "resize_image",
+ "name": "Image Resize",
+ "icon": """M264 96L120 96C106.7 96 96 106.7 96 120L96 264C96 273.7 101.8 282.5 110.8 286.2C119.8 289.9 130.1 287.8 137 281L177 241L256 320L177 399L137 359C130.1 352.1 119.8 350.1 110.8 353.8C101.8 357.5 96 366.3 96 376L96 520C96 533.3 106.7 544 120 544L264 544C273.7 544 282.5 538.2 286.2 529.2C289.9 520.2 287.9 509.9 281 503L241 463L320 384L399 463L359 503C352.1 509.9 350.1 520.2 353.8 529.2C357.5 538.2 366.3 544 376 544L520 544C533.3 544 544 533.3 544 520L544 376C544 366.3 538.2 357.5 529.2 353.8C520.2 350.1 509.9 352.1 503 359L463 399L384 320L463 241L503 281C509.9 287.9 520.2 289.9 529.2 286.2C538.2 282.5 544 273.7 544 264L544 120C544 106.7 533.3 96 520 96L376 96C366.3 96 357.5 101.8 353.8 110.8C350.1 119.8 352.2 130.1 359 137L399 177L320 256L241 177L281 137C287.9 130.1 289.9 119.8 286.2 110.8C282.5 101.8 273.7 96 264 96z""",
+ "color": "blue",
+ "description": "Resize and compress images",
+ },
+ {
+ "id": "image2pdf",
+ "name": "Image to PDF",
+ "icon": """M128 64C92.7 64 64 92.7 64 128L64 512C64 547.3 92.7 576 128 576L208 576L208 464C208 428.7 236.7 400 272 400L448 400L448 234.5C448 217.5 441.3 201.2 429.3 189.2L322.7 82.7C310.7 70.7 294.5 64 277.5 64L128 64zM389.5 240L296 240C282.7 240 272 229.3 272 216L272 122.5L389.5 240zM272 444C261 444 252 453 252 464L252 592C252 603 261 612 272 612C283 612 292 603 292 592L292 564L304 564C337.1 564 364 537.1 364 504C364 470.9 337.1 444 304 444L272 444zM304 524L292 524L292 484L304 484C315 484 324 493 324 504C324 515 315 524 304 524zM400 444C389 444 380 453 380 464L380 592C380 603 389 612 400 612L432 612C460.7 612 484 588.7 484 560L484 496C484 467.3 460.7 444 432 444L400 444zM420 572L420 484L432 484C438.6 484 444 489.4 444 496L444 560C444 566.6 438.6 572 432 572L420 572zM508 464L508 592C508 603 517 612 528 612C539 612 548 603 548 592L548 548L576 548C587 548 596 539 596 528C596 517 587 508 576 508L548 508L548 484L576 484C587 484 596 475 596 464C596 453 587 444 576 444L528 444C517 444 508 453 508 464z""",
+ "color": "green",
+ "description": "Convert images to PDF documents",
+ },
+ {
+ "id": "image2word",
+ "name": "Image to Word",
+ "icon": """M128 128C128 92.7 156.7 64 192 64L341.5 64C358.5 64 374.8 70.7 386.8 82.7L493.3 189.3C505.3 201.3 512 217.6 512 234.6L512 512C512 547.3 483.3 576 448 576L192 576C156.7 576 128 547.3 128 512L128 128zM336 122.5L336 216C336 229.3 346.7 240 360 240L453.5 240L336 122.5zM263.4 338.8C260.5 325.9 247.7 317.7 234.8 320.6C221.9 323.5 213.7 336.3 216.6 349.2L248.6 493.2C250.9 503.7 260 511.4 270.8 512C281.6 512.6 291.4 505.9 294.8 495.6L320 419.9L345.2 495.6C348.6 505.8 358.4 512.5 369.2 512C380 511.5 389.1 503.8 391.4 493.2L423.4 349.2C426.3 336.3 418.1 323.4 405.2 320.6C392.3 317.8 379.4 325.9 376.6 338.8L363.4 398.2L342.8 336.4C339.5 326.6 330.4 320 320 320C309.6 320 300.5 326.6 297.2 336.4L276.6 398.2L263.4 338.8z""",
+ "color": "purple",
+ "description": "Convert images to Word documents",
+ },
+ {
+ "id": "image2gray",
+ "name": "Grayscale Conversion",
+ "icon": """M320 64C178.6 64 64 178.6 64 320C64 461.4 178.6 576 320 576C388.8 576 451.3 548.8 497.3 504.6C504.6 497.6 506.7 486.7 502.6 477.5C498.5 468.3 488.9 462.6 478.8 463.4C473.9 463.8 469 464 464 464C362.4 464 280 381.6 280 280C280 207.9 321.5 145.4 382.1 115.2C391.2 110.7 396.4 100.9 395.2 90.8C394 80.7 386.6 72.5 376.7 70.3C358.4 66.2 339.4 64 320 64z""",
+ "color": "gray",
+ "description": "Convert images to grayscale",
+ },
+ {
+ "id": "ocr",
+ "name": "OCR Text Extraction",
+ "icon": """M72 96C49.9 96 32 113.9 32 136L32 192C32 209.7 46.3 224 64 224C81.7 224 96 209.7 96 192L96 160L160 160L160 480L128 480C110.3 480 96 494.3 96 512C96 529.7 110.3 544 128 544L256 544C273.7 544 288 529.7 288 512C288 494.3 273.7 480 256 480L224 480L224 160L288 160L288 192C288 209.7 302.3 224 320 224C337.7 224 352 209.7 352 192L352 136C352 113.9 334.1 96 312 96L72 96zM470.6 425.4C458.1 412.9 437.8 412.9 425.3 425.4C412.8 437.9 412.8 458.2 425.3 470.7L489.3 534.7C501.8 547.2 522.1 547.2 534.6 534.7L598.6 470.7C611.1 458.2 611.1 437.9 598.6 425.4C586.1 412.9 565.8 412.9 553.3 425.4L543.9 434.8L543.9 205.3L553.3 214.7C565.8 227.2 586.1 227.2 598.6 214.7C611.1 202.2 611.1 181.9 598.6 169.4L534.6 105.4C528.6 99.4 520.5 96 512 96C503.5 96 495.4 99.4 489.4 105.4L425.4 169.4C412.9 181.9 412.9 202.2 425.4 214.7C437.9 227.2 458.2 227.2 470.7 214.7L480.1 205.3L480.1 434.8L470.7 425.4z""",
+ "color": "indigo",
+ "description": "Extract text from images using OCR",
+ },
+ ],
+ },
+ "audio": {
+ "icon": "music",
+ "color": "green",
+ "description": "Audio conversion and processing tools",
+ "tools": [
+ {
+ "id": "convert_audio",
+ "name": "Audio Conversion",
+ "icon": """M566.6 214.6L470.6 310.6C461.4 319.8 447.7 322.5 435.7 317.5C423.7 312.5 416 300.9 416 288L416 224L96 224C78.3 224 64 209.7 64 192C64 174.3 78.3 160 96 160L416 160L416 96C416 83.1 423.8 71.4 435.8 66.4C447.8 61.4 461.5 64.2 470.7 73.3L566.7 169.3C579.2 181.8 579.2 202.1 566.7 214.6zM169.3 566.6L73.3 470.6C60.8 458.1 60.8 437.8 73.3 425.3L169.3 329.3C178.5 320.1 192.2 317.4 204.2 322.4C216.2 327.4 224 339.1 224 352L224 416L544 416C561.7 416 576 430.3 576 448C576 465.7 561.7 480 544 480L224 480L224 544C224 556.9 216.2 568.6 204.2 573.6C192.2 578.6 178.5 575.8 169.3 566.7z""",
+ "color": "green",
+ "description": "Convert between MP3, WAV, FLAC, and other audio formats",
+ },
+ {
+ "id": "audio_join",
+ "name": "Audio Joining",
+ "icon": """M296.5 69.2C311.4 62.3 328.6 62.3 343.5 69.2L562.1 170.2C570.6 174.1 576 182.6 576 192C576 201.4 570.6 209.9 562.1 213.8L343.5 314.8C328.6 321.7 311.4 321.7 296.5 314.8L77.9 213.8C69.4 209.8 64 201.3 64 192C64 182.7 69.4 174.1 77.9 170.2L296.5 69.2zM112.1 282.4L276.4 358.3C304.1 371.1 336 371.1 363.7 358.3L528 282.4L562.1 298.2C570.6 302.1 576 310.6 576 320C576 329.4 570.6 337.9 562.1 341.8L343.5 442.8C328.6 449.7 311.4 449.7 296.5 442.8L77.9 341.8C69.4 337.8 64 329.3 64 320C64 310.7 69.4 302.1 77.9 298.2L112 282.4zM77.9 426.2L112 410.4L276.3 486.3C304 499.1 335.9 499.1 363.6 486.3L527.9 410.4L562 426.2C570.5 430.1 575.9 438.6 575.9 448C575.9 457.4 570.5 465.9 562 469.8L343.4 570.8C328.5 577.7 311.3 577.7 296.4 570.8L77.9 469.8C69.4 465.8 64 457.3 64 448C64 438.7 69.4 430.1 77.9 426.2z""",
+ "color": "blue",
+ "description": "Merge multiple audio files into one",
+ },
+ {
+ "id": "extract_audio",
+ "name": "Extract Audio from Video",
+ "icon": """M96 240L96 352C96 475.7 196.3 576 320 576C443.7 576 544 475.7 544 352L544 240L416 240L416 352C416 405 373 448 320 448C267 448 224 405 224 352L224 240L96 240zM96 192L224 192L224 128C224 110.3 209.7 96 192 96L128 96C110.3 96 96 110.3 96 128L96 192zM416 192L544 192L544 128C544 110.3 529.7 96 512 96L448 96C430.3 96 416 110.3 416 128L416 192z""",
+ "color": "purple",
+ "description": "Extract audio tracks from video files",
+ },
+ {
+ "id": "audio_effect",
+ "name": "Audio Effects",
+ "icon": """M128 160C128 142.3 142.3 128 160 128L320 128C337.7 128 352 142.3 352 160L352 448L448 448L448 320C448 302.3 462.3 288 480 288L544 288C561.7 288 576 302.3 576 320C576 337.7 561.7 352 544 352L512 352L512 480C512 497.7 497.7 512 480 512L320 512C302.3 512 288 497.7 288 480L288 192L192 192L192 320C192 337.7 177.7 352 160 352L96 352C78.3 352 64 337.7 64 320C64 302.3 78.3 288 96 288L128 288L128 160""",
+ "color": "yellow",
+ "description": "Apply effects and process audio files",
+ },
+ ],
+ },
+ "video": {
+ "icon": "video",
+ "color": "purple",
+ "description": "Video conversion and analysis tools",
+ "tools": [
+ {
+ "id": "convert_video",
+ "name": "Video Conversion",
+ "icon": """M544.1 256L552 256C565.3 256 576 245.3 576 232L576 88C576 78.3 570.2 69.5 561.2 65.8C552.2 62.1 541.9 64.2 535 71L483.3 122.8C439 86.1 382 64 320 64C191 64 84.3 159.4 66.6 283.5C64.1 301 76.2 317.2 93.7 319.7C111.2 322.2 127.4 310 129.9 292.6C143.2 199.5 223.3 128 320 128C364.4 128 405.2 143 437.7 168.3L391 215C384.1 221.9 382.1 232.2 385.8 241.2C389.5 250.2 398.3 256 408 256L544.1 256zM573.5 356.5C576 339 563.8 322.8 546.4 320.3C529 317.8 512.7 330 510.2 347.4C496.9 440.4 416.8 511.9 320.1 511.9C275.7 511.9 234.9 496.9 202.4 471.6L249 425C255.9 418.1 257.9 407.8 254.2 398.8C250.5 389.8 241.7 384 232 384L88 384C74.7 384 64 394.7 64 408L64 552C64 561.7 69.8 570.5 78.8 574.2C87.8 577.9 98.1 575.8 105 569L156.8 517.2C201 553.9 258 576 320 576C449 576 555.7 480.6 573.4 356.5z""",
+ "color": "purple",
+ "description": "Convert between MP4, MKV, AVI, and other video formats",
+ },
+ {
+ "id": "analyze_video",
+ "name": "Video Analysis",
+ "icon": """M96 96C113.7 96 128 110.3 128 128L128 464C128 472.8 135.2 480 144 480L544 480C561.7 480 576 494.3 576 512C576 529.7 561.7 544 544 544L144 544C99.8 544 64 508.2 64 464L64 128C64 110.3 78.3 96 96 96zM192 160C192 142.3 206.3 128 224 128L416 128C433.7 128 448 142.3 448 160C448 177.7 433.7 192 416 192L224 192C206.3 192 192 177.7 192 160zM224 240L352 240C369.7 240 384 254.3 384 272C384 289.7 369.7 304 352 304L224 304C206.3 304 192 289.7 192 272C192 254.3 206.3 240 224 240zM224 352L480 352C497.7 352 512 366.3 512 384C512 401.7 497.7 416 480 416L224 416C206.3 416 192 401.7 192 384C192 366.3 206.3 352 224 352z""",
+ "color": "green",
+ "description": "Analyze video files and extract metadata",
+ },
+ {
+ "id": "extract_audio",
+ "name": "Extract Audio from Video",
+ "icon": """M532 71C539.6 77.1 544 86.3 544 96L544 400C544 444.2 501 480 448 480C395 480 352 444.2 352 400C352 355.8 395 320 448 320C459.2 320 470 321.6 480 324.6L480 207.9L256 257.7L256 464C256 508.2 213 544 160 544C107 544 64 508.2 64 464C64 419.8 107 384 160 384C171.2 384 182 385.6 192 388.6L192 160C192 145 202.4 132 217.1 128.8L505.1 64.8C514.6 62.7 524.5 65 532.1 71.1z""",
+ "color": "blue",
+ "description": "Extract audio tracks from video files",
+ },
+ ],
+ },
+ "batch": {
+ "icon": "layer-group",
+ "color": "purple",
+ "description": "Batch processing and workflow tools",
+ "tools": [
+ {
+ "id": "batch_dashboard",
+ "name": "Batch Processing Dashboard",
+ "icon": """M64 320C64 178.6 178.6 64 320 64C461.4 64 576 178.6 576 320C576 461.4 461.4 576 320 576C178.6 576 64 461.4 64 320zM352 160C352 142.3 337.7 128 320 128C302.3 128 288 142.3 288 160C288 177.7 302.3 192 320 192C337.7 192 352 177.7 352 160zM320 480C355.3 480 384 451.3 384 416C384 399.8 378 384.9 368 373.7L437.5 234.8C443.4 222.9 438.6 208.5 426.8 202.6C415 196.7 400.5 201.5 394.6 213.3L325.1 352.2C323.4 352.1 321.7 352 320 352C284.7 352 256 380.7 256 416C256 451.3 284.7 480 320 480zM240 208C240 190.3 225.7 176 208 176C190.3 176 176 190.3 176 208C176 225.7 190.3 240 208 240C225.7 240 240 225.7 240 208zM160 352C177.7 352 192 337.7 192 320C192 302.3 177.7 288 160 288C142.3 288 128 302.3 128 320C128 337.7 142.3 352 160 352zM512 320C512 302.3 497.7 288 480 288C462.3 288 448 302.3 448 320C448 337.7 462.3 352 480 352C497.7 352 512 337.7 512 320z""",
+ "color": "purple",
+ "description": "Manage batch processing operations",
+ },
+ {
+ "id": "batch_doc_convert",
+ "name": "Batch Document Conversion",
+ "icon": """M288 64C252.7 64 224 92.7 224 128L224 384C224 419.3 252.7 448 288 448L480 448C515.3 448 544 419.3 544 384L544 183.4C544 166 536.9 149.3 524.3 137.2L466.6 81.8C454.7 70.4 438.8 64 422.3 64L288 64zM160 192C124.7 192 96 220.7 96 256L96 512C96 547.3 124.7 576 160 576L352 576C387.3 576 416 547.3 416 512L416 496L352 496L352 512L160 512L160 256L176 256L176 192L160 192z""",
+ "color": "blue",
+ "description": "Convert multiple documents in batch",
+ },
+ {
+ "id": "folder_operations",
+ "name": "Folder Operations",
+ "icon": """M80 88C80 74.7 69.3 64 56 64C42.7 64 32 74.7 32 88L32 456C32 486.9 57.1 512 88 512L272 512L272 464L88 464C83.6 464 80 460.4 80 456L80 224L272 224L272 176L80 176L80 88zM368 288L560 288C586.5 288 608 266.5 608 240L608 144C608 117.5 586.5 96 560 96L477.3 96C468.8 96 460.7 92.6 454.7 86.6L446.1 78C437.1 69 424.9 63.9 412.2 63.9L368 64C341.5 64 320 85.5 320 112L320 240C320 266.5 341.5 288 368 288zM368 576L560 576C586.5 576 608 554.5 608 528L608 432C608 405.5 586.5 384 560 384L477.3 384C468.8 384 460.7 380.6 454.7 374.6L446.1 366C437.1 357 424.9 351.9 412.2 351.9L368 352C341.5 352 320 373.5 320 400L320 528C320 554.5 341.5 576 368 576z""",
+ "color": "yellow",
+ "description": "Process entire folders recursively",
+ },
+ {
+ "id": "bulk_ocr",
+ "name": "Bulk OCR Processing",
+ "icon": """M72 96C49.9 96 32 113.9 32 136L32 192C32 209.7 46.3 224 64 224C81.7 224 96 209.7 96 192L96 160L160 160L160 480L128 480C110.3 480 96 494.3 96 512C96 529.7 110.3 544 128 544L256 544C273.7 544 288 529.7 288 512C288 494.3 273.7 480 256 480L224 480L224 160L288 160L288 192C288 209.7 302.3 224 320 224C337.7 224 352 209.7 352 192L352 136C352 113.9 334.1 96 312 96L72 96zM470.6 425.4C458.1 412.9 437.8 412.9 425.3 425.4C412.8 437.9 412.8 458.2 425.3 470.7L489.3 534.7C501.8 547.2 522.1 547.2 534.6 534.7L598.6 470.7C611.1 458.2 611.1 437.9 598.6 425.4C586.1 412.9 565.8 412.9 553.3 425.4L543.9 434.8L543.9 205.3L553.3 214.7C565.8 227.2 586.1 227.2 598.6 214.7C611.1 202.2 611.1 181.9 598.6 169.4L534.6 105.4C528.6 99.4 520.5 96 512 96C503.5 96 495.4 99.4 489.4 105.4L425.4 169.4C412.9 181.9 412.9 202.2 425.4 214.7C437.9 227.2 458.2 227.2 470.7 214.7L480.1 205.3L480.1 434.8L470.7 425.4z""",
+ "color": "indigo",
+ "description": "Extract text from multiple files",
+ },
+ ],
+ },
+}
diff --git a/fweb/core/forms.py b/fweb/core/forms.py
new file mode 100644
index 0000000..e1bc796
--- /dev/null
+++ b/fweb/core/forms.py
@@ -0,0 +1,89 @@
+from django import forms
+
+
+class FileUploadForm(forms.Form):
+ files = forms.FileField(
+ widget=forms.ClearableFileInput(attrs={"multiple": True}), required=True
+ )
+ target_format = forms.ChoiceField(required=False)
+ use_extras = forms.BooleanField(required=False)
+
+ def __init__(self, *args, **kwargs):
+ tool_config = kwargs.pop("tool_config", {})
+ super().__init__(*args, **kwargs)
+
+ # Dynamically set choices based on tool
+ if "format_choices" in tool_config:
+ self.fields["target_format"].choices = tool_config["format_choices"]
+
+
+class DocumentConversionForm(FileUploadForm):
+ isolate = forms.CharField(required=False, max_length=50)
+ threads = forms.IntegerField(required=False, min_value=1, max_value=10, initial=3)
+ preserve_quality = forms.BooleanField(required=False, initial=True)
+
+
+class ImageConversionForm(FileUploadForm):
+ quality = forms.IntegerField(required=False, min_value=1, max_value=100, initial=85)
+ width = forms.IntegerField(required=False, min_value=1)
+ height = forms.IntegerField(required=False, min_value=1)
+ size_limit = forms.CharField(required=False, max_length=20)
+
+
+class AudioConversionForm(FileUploadForm):
+ bitrate = forms.ChoiceField(
+ choices=[
+ ("128", "128 kbps"),
+ ("192", "192 kbps"),
+ ("256", "256 kbps"),
+ ("320", "320 kbps"),
+ ],
+ initial="192",
+ )
+ sample_rate = forms.ChoiceField(
+ choices=[("44100", "44.1 kHz"), ("48000", "48 kHz"), ("96000", "96 kHz")],
+ initial="44100",
+ )
+
+
+class VideoConversionForm(FileUploadForm):
+ quality = forms.ChoiceField(
+ choices=[
+ ("high", "High Quality"),
+ ("medium", "Medium Quality"),
+ ("low", "Low Quality"),
+ ("original", "Original Quality"),
+ ],
+ initial="medium",
+ )
+ resolution = forms.ChoiceField(
+ choices=[
+ ("original", "Original"),
+ ("4k", "4K (3840x2160)"),
+ ("1080p", "1080p (1920x1080)"),
+ ("720p", "720p (1280x720)"),
+ ],
+ initial="original",
+ )
+
+
+class OCRForm(FileUploadForm):
+ language = forms.ChoiceField(
+ choices=[
+ ("eng", "English"),
+ ("spa", "Spanish"),
+ ("fra", "French"),
+ ("deu", "German"),
+ ("multi", "Multiple Languages"),
+ ],
+ initial="eng",
+ )
+ output_format = forms.ChoiceField(
+ choices=[
+ ("txt", "Plain Text"),
+ ("docx", "Word Document"),
+ ("pdf", "PDF Document"),
+ ],
+ initial="txt",
+ )
+ preserve_layout = forms.BooleanField(required=False, initial=True)
diff --git a/fweb/core/migrations/0001_initial.py b/fweb/core/migrations/0001_initial.py
new file mode 100644
index 0000000..e6a71d5
--- /dev/null
+++ b/fweb/core/migrations/0001_initial.py
@@ -0,0 +1,48 @@
+# Generated by Django 5.1.6 on 2025-09-26 16:15
+
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='ProcessingJob',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('job_id', models.CharField(max_length=100, unique=True)),
+ ('tool_id', models.CharField(max_length=50)),
+ ('input_files', models.JSONField()),
+ ('output_files', models.JSONField(default=list)),
+ ('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)),
+ ('progress', models.IntegerField(default=0)),
+ ('created_at', models.DateTimeField(auto_now_add=True)),
+ ('updated_at', models.DateTimeField(auto_now=True)),
+ ('error_message', models.TextField(blank=True)),
+ ('user', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
+ ],
+ options={
+ 'ordering': ['-created_at'],
+ },
+ ),
+ migrations.CreateModel(
+ name='ProcessedFile',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('original_name', models.CharField(max_length=255)),
+ ('processed_name', models.CharField(max_length=255)),
+ ('file_path', models.CharField(max_length=500)),
+ ('file_size', models.BigIntegerField()),
+ ('processed_at', models.DateTimeField(auto_now_add=True)),
+ ('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.processingjob')),
+ ],
+ ),
+ ]
diff --git a/fweb/core/migrations/__init__.py b/fweb/core/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fweb/core/models.py b/fweb/core/models.py
new file mode 100644
index 0000000..8ed85ef
--- /dev/null
+++ b/fweb/core/models.py
@@ -0,0 +1,41 @@
+from django.db import models
+from django.contrib.auth.models import User
+
+
+class ProcessingJob(models.Model):
+ JOB_STATUS = [
+ ("pending", "Pending"),
+ ("processing", "Processing"),
+ ("completed", "Completed"),
+ ("failed", "Failed"),
+ ("cancelled", "Cancelled"),
+ ]
+
+ job_id = models.CharField(max_length=100, unique=True)
+ user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True)
+ tool_id = models.CharField(max_length=50)
+ input_files = models.JSONField() # List of input file paths
+ output_files = models.JSONField(default=list) # List of output file paths
+ status = models.CharField(max_length=20, choices=JOB_STATUS, default="pending")
+ progress = models.IntegerField(default=0) # 0-100
+ created_at = models.DateTimeField(auto_now_add=True)
+ updated_at = models.DateTimeField(auto_now=True)
+ error_message = models.TextField(blank=True)
+
+ class Meta:
+ ordering = ["-created_at"]
+
+ def __str__(self):
+ return f"{self.job_id} - {self.tool_id} - {self.status}"
+
+
+class ProcessedFile(models.Model):
+ job = models.ForeignKey(ProcessingJob, on_delete=models.CASCADE)
+ original_name = models.CharField(max_length=255)
+ processed_name = models.CharField(max_length=255)
+ file_path = models.CharField(max_length=500)
+ file_size = models.BigIntegerField()
+ processed_at = models.DateTimeField(auto_now_add=True)
+
+ def __str__(self):
+ return f"{self.original_name} -> {self.processed_name}"
diff --git a/fweb/core/static/css/config.css b/fweb/core/static/css/config.css
new file mode 100644
index 0000000..fe30e5a
--- /dev/null
+++ b/fweb/core/static/css/config.css
@@ -0,0 +1,127 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+.scrollbar-hide {
+ /* Hide scrollbar for Chrome, Safari, and Edge */
+ -ms-overflow-style: none; /* Internet Explorer 10+ */
+ scrollbar-width: none; /* Firefox */
+ overflow: -moz-scrollbars-none; /* Older Firefox */
+ overflow-y: scroll; /* Add this to ensure the content is scrollable */
+ &::-webkit-scrollbar {
+ display: none; /* Hide scrollbar for Chrome, Safari, and Edge */
+ }
+}
+
+@import url("https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap");
+
+body {
+ font-family: "Inter", sans-serif;
+}
+
+.fade-in {
+ animation: fadeIn 0.5s ease-in-out;
+}
+
+@keyframes fadeIn {
+ from {
+ opacity: 0;
+ }
+ to {
+ opacity: 1;
+ }
+}
+
+.gradient-bg {
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+}
+.card-hover {
+ transition: all 0.3s ease;
+}
+.card-hover:hover {
+ transform: translateY(-5px);
+ box-shadow:
+ 0 20px 25px -5px rgba(0, 0, 0, 0.1),
+ 0 10px 10px -5px rgba(0, 0, 0, 0.04);
+}
+.file-drop-zone {
+ border: 2px dashed #d1d5db;
+ transition: all 0.3s ease;
+}
+.file-drop-zone.dragover {
+ border-color: #3b82f6;
+ background-color: #eff6ff;
+}
+.nav-link {
+ @apply px-3 py-2 rounded-md text-sm font-medium text-gray-700 dark:text-gray-300 hover:text-gray-900 dark:hover:text-white hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors;
+}
+.nav-link.active {
+ @apply bg-blue-100 dark:bg-blue-900 text-blue-700 dark:text-blue-300;
+}
+.tool-active {
+ @apply bg-blue-50 dark:bg-blue-900 border-blue-500 border-r-4;
+}
+
+.nav-svg {
+ @apply w-[1.1rem] h-[1.1rem] text-2xl text-blue-600 fill-gray-700 dark:fill-gray-300;
+}
+
+.nav-svg.active {
+ @apply fill-blue-700 dark:fill-blue-300;
+}
+
+.waveform {
+ display: flex;
+ align-items: center;
+ height: 40px;
+ width: 100%;
+ justify-content: space-between;
+}
+
+.bar {
+ width: 3px;
+ height: 10px;
+ background-color: #3b82f6;
+ border-radius: 3px;
+ animation: wave 1.2s infinite ease-in-out;
+}
+
+@keyframes wave {
+ 0%,
+ 100% {
+ transform: scaleY(0.5);
+ }
+ 50% {
+ transform: scaleY(1.8);
+ }
+}
+
+.bar:nth-child(1) {
+ animation-delay: 0s;
+}
+.bar:nth-child(2) {
+ animation-delay: 0.1s;
+}
+.bar:nth-child(3) {
+ animation-delay: 0.2s;
+}
+.bar:nth-child(4) {
+ animation-delay: 0.3s;
+}
+.bar:nth-child(5) {
+ animation-delay: 0.4s;
+}
+.bar:nth-child(6) {
+ animation-delay: 0.5s;
+}
+.bar:nth-child(7) {
+ animation-delay: 0.6s;
+}
+.bar:nth-child(8) {
+ animation-delay: 0.7s;
+}
+.bar:nth-child(9) {
+ animation-delay: 0.8s;
+}
+.bar:nth-child(10) {
+ animation-delay: 0.9s;
+}
diff --git a/fweb/core/static/css/styles.css b/fweb/core/static/css/styles.css
new file mode 100644
index 0000000..d882f01
--- /dev/null
+++ b/fweb/core/static/css/styles.css
@@ -0,0 +1 @@
+*,::backdrop,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:#3b82f680;--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }/*! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com*/*,:after,:before{box-sizing:border-box;border:0 solid #e5e7eb}:after,:before{--tw-content:""}:host,html{line-height:1.5;-webkit-text-size-adjust:100%;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:JetBrains Mono,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:initial}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:initial;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:initial}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.\!container{width:100%!important}.container{width:100%}@media (min-width:640px){.\!container{max-width:640px!important}.container{max-width:640px}}@media (min-width:768px){.\!container{max-width:768px!important}.container{max-width:768px}}@media (min-width:1024px){.\!container{max-width:1024px!important}.container{max-width:1024px}}@media (min-width:1280px){.\!container{max-width:1280px!important}.container{max-width:1280px}}@media (min-width:1536px){.\!container{max-width:1536px!important}.container{max-width:1536px}}.pointer-events-none{pointer-events:none}.visible{visibility:visible}.collapse{visibility:collapse}.static{position:static}.\!fixed{position:fixed!important}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.sticky{position:-webkit-sticky;position:sticky}.inset-0{inset:0}.left-3{left:.75rem}.top-0{top:0}.top-1\/2{top:50%}.top-24{top:6rem}.top-6{top:1.5rem}.isolate{isolation:isolate}.z-50{z-index:50}.mx-2{margin-left:.5rem;margin-right:.5rem}.mx-4{margin-left:1rem;margin-right:1rem}.mx-auto{margin-left:auto;margin-right:auto}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.mb-6{margin-bottom:1.5rem}.mb-8{margin-bottom:2rem}.ml-2{margin-left:.5rem}.ml-6{margin-left:1.5rem}.mr-1{margin-right:.25rem}.mr-2{margin-right:.5rem}.mr-3{margin-right:.75rem}.mr-4{margin-right:1rem}.mt-1{margin-top:.25rem}.mt-12{margin-top:3rem}.mt-2{margin-top:.5rem}.mt-3{margin-top:.75rem}.mt-4{margin-top:1rem}.mt-6{margin-top:1.5rem}.mt-8{margin-top:2rem}.block{display:block}.inline-block{display:inline-block}.inline{display:inline}.flex{display:flex}.table{display:table}.table-caption{display:table-caption}.table-cell{display:table-cell}.grid{display:grid}.contents{display:contents}.\!hidden{display:none!important}.hidden{display:none}.h-10{height:2.5rem}.h-12{height:3rem}.h-16{height:4rem}.h-2{height:.5rem}.h-3{height:.75rem}.h-48{height:12rem}.h-5{height:1.25rem}.h-6{height:1.5rem}.h-7{height:1.75rem}.h-8{height:2rem}.h-full{height:100%}.max-h-40{max-height:10rem}.max-h-96{max-height:24rem}.max-h-\[90vh\]{max-height:90vh}.min-h-\[600px\]{min-height:600px}.w-10{width:2.5rem}.w-12{width:3rem}.w-5{width:1.25rem}.w-6{width:1.5rem}.w-7{width:1.75rem}.w-8{width:2rem}.w-full{width:100%}.max-w-3xl{max-width:48rem}.max-w-4xl{max-width:56rem}.max-w-6xl{max-width:72rem}.max-w-7xl{max-width:80rem}.max-w-md{max-width:28rem}.flex-1{flex:1 1 0%}.flex-shrink-0{flex-shrink:0}.-translate-y-1\/2{--tw-translate-y:-50%}.-translate-y-1\/2,.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-pointer{cursor:pointer}.resize{resize:both}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-center{align-items:center}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1{gap:.25rem}.gap-2{gap:.5rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.space-x-2>:not([hidden])~:not([hidden]){--tw-space-x-reverse:0;margin-right:calc(.5rem*var(--tw-space-x-reverse));margin-left:calc(.5rem*(1 - var(--tw-space-x-reverse)))}.space-x-3>:not([hidden])~:not([hidden]){--tw-space-x-reverse:0;margin-right:calc(.75rem*var(--tw-space-x-reverse));margin-left:calc(.75rem*(1 - var(--tw-space-x-reverse)))}.space-x-4>:not([hidden])~:not([hidden]){--tw-space-x-reverse:0;margin-right:calc(1rem*var(--tw-space-x-reverse));margin-left:calc(1rem*(1 - var(--tw-space-x-reverse)))}.space-y-1>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-top:calc(.25rem*(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.25rem*var(--tw-space-y-reverse))}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-top:calc(.5rem*(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.5rem*var(--tw-space-y-reverse))}.space-y-3>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-top:calc(.75rem*(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.75rem*var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-top:calc(1rem*(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem*var(--tw-space-y-reverse))}.space-y-6>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-top:calc(1.5rem*(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.5rem*var(--tw-space-y-reverse))}.overflow-auto{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-y-auto{overflow-y:auto}.truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.rounded{border-radius:.25rem}.rounded-2xl{border-radius:1rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-xl{border-radius:.75rem}.border{border-width:1px}.border-2{border-width:2px}.border-b{border-bottom-width:1px}.border-r-4{border-right-width:4px}.border-t{border-top-width:1px}.border-dashed{border-style:dashed}.border-blue-500{--tw-border-opacity:1;border-color:rgb(59 130 246/var(--tw-border-opacity,1))}.border-gray-200{--tw-border-opacity:1;border-color:rgb(229 231 235/var(--tw-border-opacity,1))}.border-gray-300{--tw-border-opacity:1;border-color:rgb(209 213 219/var(--tw-border-opacity,1))}.bg-black{--tw-bg-opacity:1;background-color:rgb(0 0 0/var(--tw-bg-opacity,1))}.bg-blue-100{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity,1))}.bg-blue-50{--tw-bg-opacity:1;background-color:rgb(239 246 255/var(--tw-bg-opacity,1))}.bg-blue-600{--tw-bg-opacity:1;background-color:rgb(37 99 235/var(--tw-bg-opacity,1))}.bg-gray-200{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity,1))}.bg-gray-50{--tw-bg-opacity:1;background-color:rgb(249 250 251/var(--tw-bg-opacity,1))}.bg-green-100{--tw-bg-opacity:1;background-color:rgb(220 252 231/var(--tw-bg-opacity,1))}.bg-green-600{--tw-bg-opacity:1;background-color:rgb(22 163 74/var(--tw-bg-opacity,1))}.bg-indigo-100{--tw-bg-opacity:1;background-color:rgb(224 231 255/var(--tw-bg-opacity,1))}.bg-indigo-600{--tw-bg-opacity:1;background-color:rgb(79 70 229/var(--tw-bg-opacity,1))}.bg-pink-100{--tw-bg-opacity:1;background-color:rgb(252 231 243/var(--tw-bg-opacity,1))}.bg-pink-600{--tw-bg-opacity:1;background-color:rgb(219 39 119/var(--tw-bg-opacity,1))}.bg-purple-100{--tw-bg-opacity:1;background-color:rgb(243 232 255/var(--tw-bg-opacity,1))}.bg-purple-600{--tw-bg-opacity:1;background-color:rgb(147 51 234/var(--tw-bg-opacity,1))}.bg-red-100{--tw-bg-opacity:1;background-color:rgb(254 226 226/var(--tw-bg-opacity,1))}.bg-red-300{--tw-bg-opacity:1;background-color:rgb(252 165 165/var(--tw-bg-opacity,1))}.bg-red-600{--tw-bg-opacity:1;background-color:rgb(220 38 38/var(--tw-bg-opacity,1))}.bg-white{--tw-bg-opacity:1;background-color:rgb(255 255 255/var(--tw-bg-opacity,1))}.bg-yellow-100{--tw-bg-opacity:1;background-color:rgb(254 249 195/var(--tw-bg-opacity,1))}.bg-yellow-600{--tw-bg-opacity:1;background-color:rgb(202 138 4/var(--tw-bg-opacity,1))}.bg-opacity-50{--tw-bg-opacity:0.5}.bg-gradient-to-br{background-image:linear-gradient(to bottom right,var(--tw-gradient-stops))}.from-blue-50{--tw-gradient-from:#eff6ff var(--tw-gradient-from-position);--tw-gradient-to:#eff6ff00 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.from-green-50{--tw-gradient-from:#f0fdf4 var(--tw-gradient-from-position);--tw-gradient-to:#f0fdf400 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.from-indigo-50{--tw-gradient-from:#eef2ff var(--tw-gradient-from-position);--tw-gradient-to:#eef2ff00 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.from-pink-50{--tw-gradient-from:#fdf2f8 var(--tw-gradient-from-position);--tw-gradient-to:#fdf2f800 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.from-red-50{--tw-gradient-from:#fef2f2 var(--tw-gradient-from-position);--tw-gradient-to:#fef2f200 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.from-yellow-50{--tw-gradient-from:#fefce8 var(--tw-gradient-from-position);--tw-gradient-to:#fefce800 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.to-blue-100{--tw-gradient-to:#dbeafe var(--tw-gradient-to-position)}.to-green-100{--tw-gradient-to:#dcfce7 var(--tw-gradient-to-position)}.to-indigo-100{--tw-gradient-to:#e0e7ff var(--tw-gradient-to-position)}.to-pink-100{--tw-gradient-to:#fce7f3 var(--tw-gradient-to-position)}.to-red-100{--tw-gradient-to:#fee2e2 var(--tw-gradient-to-position)}.to-yellow-100{--tw-gradient-to:#fef9c3 var(--tw-gradient-to-position)}.fill-blue-500{fill:#3b82f6}.fill-blue-600{fill:#2563eb}.fill-gray-400{fill:#9ca3af}.fill-gray-500{fill:#6b7280}.fill-gray-700{fill:#374151}.fill-green-500{fill:#22c55e}.fill-green-600{fill:#16a34a}.fill-indigo-500{fill:#6366f1}.fill-indigo-600{fill:#4f46e5}.fill-pink-600{fill:#db2777}.fill-purple-500{fill:#a855f7}.fill-purple-600{fill:#9333ea}.fill-red-500{fill:#ef4444}.fill-red-600{fill:#dc2626}.fill-sky-500{fill:#0ea5e9}.fill-slate-800{fill:#1e293b}.fill-white{fill:#fff}.fill-yellow-500{fill:#eab308}.fill-yellow-600{fill:#ca8a04}.fill-orange-500{fill:#f97316}.fill-pink-500{fill:#ec4899}.stroke-white{stroke:#fff}.p-2{padding:.5rem}.p-3{padding:.75rem}.p-4{padding:1rem}.p-6{padding:1.5rem}.p-8{padding:2rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.px-6{padding-left:1.5rem;padding-right:1.5rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.py-12{padding-top:3rem;padding-bottom:3rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-6{padding-top:1.5rem;padding-bottom:1.5rem}.py-8{padding-top:2rem;padding-bottom:2rem}.pl-10{padding-left:2.5rem}.pr-4{padding-right:1rem}.pt-6{padding-top:1.5rem}.text-left{text-align:left}.text-center{text-align:center}.font-mono{font-family:JetBrains Mono,monospace}.text-2xl{font-size:1.5rem;line-height:2rem}.text-3xl{font-size:1.875rem;line-height:2.25rem}.text-4xl{font-size:2.25rem;line-height:2.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.lowercase{text-transform:lowercase}.text-blue-500{--tw-text-opacity:1;color:rgb(59 130 246/var(--tw-text-opacity,1))}.text-blue-600{--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity,1))}.text-blue-800{--tw-text-opacity:1;color:rgb(30 64 175/var(--tw-text-opacity,1))}.text-gray-300{--tw-text-opacity:1;color:rgb(209 213 219/var(--tw-text-opacity,1))}.text-gray-400{--tw-text-opacity:1;color:rgb(156 163 175/var(--tw-text-opacity,1))}.text-gray-500{--tw-text-opacity:1;color:rgb(107 114 128/var(--tw-text-opacity,1))}.text-gray-600{--tw-text-opacity:1;color:rgb(75 85 99/var(--tw-text-opacity,1))}.text-gray-700{--tw-text-opacity:1;color:rgb(55 65 81/var(--tw-text-opacity,1))}.text-gray-800{--tw-text-opacity:1;color:rgb(31 41 55/var(--tw-text-opacity,1))}.text-gray-900{--tw-text-opacity:1;color:rgb(17 24 39/var(--tw-text-opacity,1))}.text-green-600{--tw-text-opacity:1;color:rgb(22 163 74/var(--tw-text-opacity,1))}.text-green-800{--tw-text-opacity:1;color:rgb(22 101 52/var(--tw-text-opacity,1))}.text-indigo-600{--tw-text-opacity:1;color:rgb(79 70 229/var(--tw-text-opacity,1))}.text-indigo-800{--tw-text-opacity:1;color:rgb(55 48 163/var(--tw-text-opacity,1))}.text-purple-600{--tw-text-opacity:1;color:rgb(147 51 234/var(--tw-text-opacity,1))}.text-purple-800{--tw-text-opacity:1;color:rgb(107 33 168/var(--tw-text-opacity,1))}.text-red-600{--tw-text-opacity:1;color:rgb(220 38 38/var(--tw-text-opacity,1))}.text-red-800{--tw-text-opacity:1;color:rgb(153 27 27/var(--tw-text-opacity,1))}.text-slate-700{--tw-text-opacity:1;color:rgb(51 65 85/var(--tw-text-opacity,1))}.text-white{--tw-text-opacity:1;color:rgb(255 255 255/var(--tw-text-opacity,1))}.text-yellow-800{--tw-text-opacity:1;color:rgb(133 77 14/var(--tw-text-opacity,1))}.opacity-90{opacity:.9}.shadow{--tw-shadow:0 1px 3px 0 #0000001a,0 1px 2px -1px #0000001a;--tw-shadow-colored:0 1px 3px 0 var(--tw-shadow-color),0 1px 2px -1px var(--tw-shadow-color)}.shadow,.shadow-lg{box-shadow:var(--tw-ring-offset-shadow,0 0 #0000),var(--tw-ring-shadow,0 0 #0000),var(--tw-shadow)}.shadow-lg{--tw-shadow:0 10px 15px -3px #0000001a,0 4px 6px -4px #0000001a;--tw-shadow-colored:0 10px 15px -3px var(--tw-shadow-color),0 4px 6px -4px var(--tw-shadow-color)}.shadow-md{--tw-shadow:0 4px 6px -1px #0000001a,0 2px 4px -2px #0000001a;--tw-shadow-colored:0 4px 6px -1px var(--tw-shadow-color),0 2px 4px -2px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow,0 0 #0000),var(--tw-ring-shadow,0 0 #0000),var(--tw-shadow)}.blur{--tw-blur:blur(8px)}.blur,.grayscale{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.grayscale{--tw-grayscale:grayscale(100%)}.\!invert{--tw-invert:invert(100%)!important;filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)!important}.invert{--tw-invert:invert(100%);filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.\!filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)!important}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-colors{transition-property:color,background-color,border-color,fill,stroke,-webkit-text-decoration-color;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,-webkit-text-decoration-color;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300{transition-duration:.3s}.ease-in{transition-timing-function:cubic-bezier(.4,0,1,1)}.ease-in-out{transition-timing-function:cubic-bezier(.4,0,.2,1)}.ease-out{transition-timing-function:cubic-bezier(0,0,.2,1)}.scrollbar-hide{-ms-overflow-style:none;scrollbar-width:none;overflow:-moz-scrollbars-none;overflow-y:scroll;&::-webkit-scrollbar{display:none}}body{font-family:Inter,sans-serif}.fade-in{animation:fadeIn .5s ease-in-out}@keyframes fadeIn{0%{opacity:0}to{opacity:1}}.gradient-bg{background:linear-gradient(135deg,#667eea,#764ba2)}.card-hover{transition:all .3s ease}.card-hover:hover{transform:translateY(-5px);box-shadow:0 20px 25px -5px #0000001a,0 10px 10px -5px #0000000a}.file-drop-zone{border:2px dashed #d1d5db;transition:all .3s ease}.file-drop-zone.dragover{border-color:#3b82f6;background-color:#eff6ff}.nav-link{border-radius:.375rem;padding:.5rem .75rem;font-size:.875rem;line-height:1.25rem;font-weight:500;--tw-text-opacity:1;color:rgb(55 65 81/var(--tw-text-opacity,1));transition-property:color,background-color,border-color,fill,stroke,-webkit-text-decoration-color;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,-webkit-text-decoration-color;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.nav-link:hover{--tw-bg-opacity:1;background-color:rgb(243 244 246/var(--tw-bg-opacity,1));--tw-text-opacity:1;color:rgb(17 24 39/var(--tw-text-opacity,1))}.nav-link:is(.dark *){--tw-text-opacity:1;color:rgb(209 213 219/var(--tw-text-opacity,1))}.nav-link:hover:is(.dark *){--tw-bg-opacity:1;background-color:rgb(55 65 81/var(--tw-bg-opacity,1));--tw-text-opacity:1;color:rgb(255 255 255/var(--tw-text-opacity,1))}.nav-link.active{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity,1));--tw-text-opacity:1;color:rgb(29 78 216/var(--tw-text-opacity,1))}.nav-link.active:is(.dark *){--tw-bg-opacity:1;background-color:rgb(30 58 138/var(--tw-bg-opacity,1));--tw-text-opacity:1;color:rgb(147 197 253/var(--tw-text-opacity,1))}.tool-active{border-right-width:4px;--tw-border-opacity:1;border-color:rgb(59 130 246/var(--tw-border-opacity,1));--tw-bg-opacity:1;background-color:rgb(239 246 255/var(--tw-bg-opacity,1))}.tool-active:is(.dark *){--tw-bg-opacity:1;background-color:rgb(30 58 138/var(--tw-bg-opacity,1))}.nav-svg{height:1.1rem;width:1.1rem;fill:#374151;font-size:1.5rem;line-height:2rem;--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity,1))}.nav-svg:is(.dark *){fill:#d1d5db}.nav-svg.active{fill:#1d4ed8}.nav-svg.active:is(.dark *){fill:#93c5fd}.waveform{display:flex;align-items:center;height:40px;width:100%;justify-content:space-between}.bar{width:3px;height:10px;background-color:#3b82f6;border-radius:3px;animation:wave 1.2s ease-in-out infinite}@keyframes wave{0%,to{transform:scaleY(.5)}50%{transform:scaleY(1.8)}}.bar:first-child{animation-delay:0s}.bar:nth-child(2){animation-delay:.1s}.bar:nth-child(3){animation-delay:.2s}.bar:nth-child(4){animation-delay:.3s}.bar:nth-child(5){animation-delay:.4s}.bar:nth-child(6){animation-delay:.5s}.bar:nth-child(7){animation-delay:.6s}.bar:nth-child(8){animation-delay:.7s}.bar:nth-child(9){animation-delay:.8s}.bar:nth-child(10){animation-delay:.9s}.hover\:bg-blue-50:hover{--tw-bg-opacity:1;background-color:rgb(239 246 255/var(--tw-bg-opacity,1))}.hover\:bg-blue-700:hover{--tw-bg-opacity:1;background-color:rgb(29 78 216/var(--tw-bg-opacity,1))}.hover\:bg-gray-300:hover{--tw-bg-opacity:1;background-color:rgb(209 213 219/var(--tw-bg-opacity,1))}.hover\:bg-gray-50:hover{--tw-bg-opacity:1;background-color:rgb(249 250 251/var(--tw-bg-opacity,1))}.hover\:bg-green-50:hover{--tw-bg-opacity:1;background-color:rgb(240 253 244/var(--tw-bg-opacity,1))}.hover\:bg-green-700:hover{--tw-bg-opacity:1;background-color:rgb(21 128 61/var(--tw-bg-opacity,1))}.hover\:bg-indigo-700:hover{--tw-bg-opacity:1;background-color:rgb(67 56 202/var(--tw-bg-opacity,1))}.hover\:bg-pink-700:hover{--tw-bg-opacity:1;background-color:rgb(190 24 93/var(--tw-bg-opacity,1))}.hover\:bg-primary-100:hover{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity,1))}.hover\:bg-purple-50:hover{--tw-bg-opacity:1;background-color:rgb(250 245 255/var(--tw-bg-opacity,1))}.hover\:bg-purple-700:hover{--tw-bg-opacity:1;background-color:rgb(126 34 206/var(--tw-bg-opacity,1))}.hover\:bg-red-50:hover{--tw-bg-opacity:1;background-color:rgb(254 242 242/var(--tw-bg-opacity,1))}.hover\:bg-red-700:hover{--tw-bg-opacity:1;background-color:rgb(185 28 28/var(--tw-bg-opacity,1))}.hover\:bg-yellow-700:hover{--tw-bg-opacity:1;background-color:rgb(161 98 7/var(--tw-bg-opacity,1))}.hover\:text-blue-600:hover{--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity,1))}.hover\:text-blue-800:hover{--tw-text-opacity:1;color:rgb(30 64 175/var(--tw-text-opacity,1))}.hover\:text-gray-700:hover{--tw-text-opacity:1;color:rgb(55 65 81/var(--tw-text-opacity,1))}.hover\:text-green-800:hover{--tw-text-opacity:1;color:rgb(22 101 52/var(--tw-text-opacity,1))}.focus\:border-transparent:focus{border-color:#0000}.focus\:ring-2:focus{--tw-ring-offset-shadow:var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow:var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow,0 0 #0000)}.focus\:ring-blue-500:focus{--tw-ring-opacity:1;--tw-ring-color:rgb(59 130 246/var(--tw-ring-opacity,1))}.focus\:ring-green-500:focus{--tw-ring-opacity:1;--tw-ring-color:rgb(34 197 94/var(--tw-ring-opacity,1))}.dark\:block:is(.dark *){display:block}.dark\:hidden:is(.dark *){display:none}.dark\:border-gray-600:is(.dark *){--tw-border-opacity:1;border-color:rgb(75 85 99/var(--tw-border-opacity,1))}.dark\:border-gray-700:is(.dark *){--tw-border-opacity:1;border-color:rgb(55 65 81/var(--tw-border-opacity,1))}.dark\:bg-\[\#004754\]:is(.dark *){--tw-bg-opacity:1;background-color:rgb(0 71 84/var(--tw-bg-opacity,1))}.dark\:bg-blue-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(29 78 216/var(--tw-bg-opacity,1))}.dark\:bg-blue-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(30 58 138/var(--tw-bg-opacity,1))}.dark\:bg-gray-600:is(.dark *){--tw-bg-opacity:1;background-color:rgb(75 85 99/var(--tw-bg-opacity,1))}.dark\:bg-gray-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(55 65 81/var(--tw-bg-opacity,1))}.dark\:bg-gray-800:is(.dark *){--tw-bg-opacity:1;background-color:rgb(31 41 55/var(--tw-bg-opacity,1))}.dark\:bg-gray-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(17 24 39/var(--tw-bg-opacity,1))}.dark\:bg-green-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(21 128 61/var(--tw-bg-opacity,1))}.dark\:bg-green-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(20 83 45/var(--tw-bg-opacity,1))}.dark\:bg-indigo-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(67 56 202/var(--tw-bg-opacity,1))}.dark\:bg-indigo-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(49 46 129/var(--tw-bg-opacity,1))}.dark\:bg-pink-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(190 24 93/var(--tw-bg-opacity,1))}.dark\:bg-purple-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(88 28 135/var(--tw-bg-opacity,1))}.dark\:bg-red-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(185 28 28/var(--tw-bg-opacity,1))}.dark\:bg-red-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(127 29 29/var(--tw-bg-opacity,1))}.dark\:bg-yellow-700:is(.dark *){--tw-bg-opacity:1;background-color:rgb(161 98 7/var(--tw-bg-opacity,1))}.dark\:bg-yellow-900:is(.dark *){--tw-bg-opacity:1;background-color:rgb(113 63 18/var(--tw-bg-opacity,1))}.dark\:from-blue-900:is(.dark *){--tw-gradient-from:#1e3a8a var(--tw-gradient-from-position);--tw-gradient-to:#1e3a8a00 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:from-green-900:is(.dark *){--tw-gradient-from:#14532d var(--tw-gradient-from-position);--tw-gradient-to:#14532d00 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:from-indigo-900:is(.dark *){--tw-gradient-from:#312e81 var(--tw-gradient-from-position);--tw-gradient-to:#312e8100 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:from-pink-900:is(.dark *){--tw-gradient-from:#831843 var(--tw-gradient-from-position);--tw-gradient-to:#83184300 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:from-red-900:is(.dark *){--tw-gradient-from:#7f1d1d var(--tw-gradient-from-position);--tw-gradient-to:#7f1d1d00 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:from-yellow-900:is(.dark *){--tw-gradient-from:#713f12 var(--tw-gradient-from-position);--tw-gradient-to:#713f1200 var(--tw-gradient-to-position);--tw-gradient-stops:var(--tw-gradient-from),var(--tw-gradient-to)}.dark\:to-blue-800:is(.dark *){--tw-gradient-to:#1e40af var(--tw-gradient-to-position)}.dark\:to-green-800:is(.dark *){--tw-gradient-to:#166534 var(--tw-gradient-to-position)}.dark\:to-indigo-800:is(.dark *){--tw-gradient-to:#3730a3 var(--tw-gradient-to-position)}.dark\:to-pink-800:is(.dark *){--tw-gradient-to:#9d174d var(--tw-gradient-to-position)}.dark\:to-red-800:is(.dark *){--tw-gradient-to:#991b1b var(--tw-gradient-to-position)}.dark\:to-yellow-800:is(.dark *){--tw-gradient-to:#854d0e var(--tw-gradient-to-position)}.dark\:fill-gray-200:is(.dark *){fill:#e5e7eb}.dark\:fill-gray-300:is(.dark *){fill:#d1d5db}.dark\:fill-gray-400:is(.dark *){fill:#9ca3af}.dark\:fill-indigo-300:is(.dark *){fill:#a5b4fc}.dark\:fill-pink-300:is(.dark *){fill:#f9a8d4}.dark\:fill-purple-300:is(.dark *){fill:#d8b4fe}.dark\:fill-white:is(.dark *){fill:#fff}.dark\:text-blue-200:is(.dark *){--tw-text-opacity:1;color:rgb(191 219 254/var(--tw-text-opacity,1))}.dark\:text-blue-400:is(.dark *){--tw-text-opacity:1;color:rgb(96 165 250/var(--tw-text-opacity,1))}.dark\:text-gray-200:is(.dark *){--tw-text-opacity:1;color:rgb(229 231 235/var(--tw-text-opacity,1))}.dark\:text-gray-300:is(.dark *){--tw-text-opacity:1;color:rgb(209 213 219/var(--tw-text-opacity,1))}.dark\:text-gray-400:is(.dark *){--tw-text-opacity:1;color:rgb(156 163 175/var(--tw-text-opacity,1))}.dark\:text-gray-500:is(.dark *){--tw-text-opacity:1;color:rgb(107 114 128/var(--tw-text-opacity,1))}.dark\:text-green-200:is(.dark *){--tw-text-opacity:1;color:rgb(187 247 208/var(--tw-text-opacity,1))}.dark\:text-green-400:is(.dark *){--tw-text-opacity:1;color:rgb(74 222 128/var(--tw-text-opacity,1))}.dark\:text-indigo-200:is(.dark *){--tw-text-opacity:1;color:rgb(199 210 254/var(--tw-text-opacity,1))}.dark\:text-indigo-400:is(.dark *){--tw-text-opacity:1;color:rgb(129 140 248/var(--tw-text-opacity,1))}.dark\:text-purple-200:is(.dark *){--tw-text-opacity:1;color:rgb(233 213 255/var(--tw-text-opacity,1))}.dark\:text-purple-400:is(.dark *){--tw-text-opacity:1;color:rgb(192 132 252/var(--tw-text-opacity,1))}.dark\:text-red-200:is(.dark *){--tw-text-opacity:1;color:rgb(254 202 202/var(--tw-text-opacity,1))}.dark\:text-red-400:is(.dark *){--tw-text-opacity:1;color:rgb(248 113 113/var(--tw-text-opacity,1))}.dark\:text-slate-300:is(.dark *){--tw-text-opacity:1;color:rgb(203 213 225/var(--tw-text-opacity,1))}.dark\:text-white:is(.dark *){--tw-text-opacity:1;color:rgb(255 255 255/var(--tw-text-opacity,1))}.dark\:text-yellow-200:is(.dark *){--tw-text-opacity:1;color:rgb(254 240 138/var(--tw-text-opacity,1))}.dark\:hover\:bg-\[\#002d34\]:hover:is(.dark *){--tw-bg-opacity:1;background-color:rgb(0 45 52/var(--tw-bg-opacity,1))}.dark\:hover\:bg-gray-500:hover:is(.dark *){--tw-bg-opacity:1;background-color:rgb(107 114 128/var(--tw-bg-opacity,1))}.dark\:hover\:bg-gray-600:hover:is(.dark *){--tw-bg-opacity:1;background-color:rgb(75 85 99/var(--tw-bg-opacity,1))}.dark\:hover\:bg-gray-700:hover:is(.dark *){--tw-bg-opacity:1;background-color:rgb(55 65 81/var(--tw-bg-opacity,1))}.dark\:hover\:text-blue-400:hover:is(.dark *){--tw-text-opacity:1;color:rgb(96 165 250/var(--tw-text-opacity,1))}@media (min-width:640px){.sm\:px-6{padding-left:1.5rem;padding-right:1.5rem}}@media (min-width:768px){.md\:col-span-2{grid-column:span 2/span 2}.md\:ml-6{margin-left:1.5rem}.md\:flex{display:flex}.md\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.md\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.md\:grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}.md\:space-x-8>:not([hidden])~:not([hidden]){--tw-space-x-reverse:0;margin-right:calc(2rem*var(--tw-space-x-reverse));margin-left:calc(2rem*(1 - var(--tw-space-x-reverse)))}}@media (min-width:1024px){.lg\:col-span-1{grid-column:span 1/span 1}.lg\:col-span-2{grid-column:span 2/span 2}.lg\:col-span-3{grid-column:span 3/span 3}.lg\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.lg\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.lg\:grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}.lg\:px-8{padding-left:2rem;padding-right:2rem}}
\ No newline at end of file
diff --git a/fweb/core/static/js/FileHandler.js b/fweb/core/static/js/FileHandler.js
new file mode 100644
index 0000000..6e7ae2b
--- /dev/null
+++ b/fweb/core/static/js/FileHandler.js
@@ -0,0 +1,134 @@
+class FileHandler {
+ constructor() {
+ this.setupGlobalFileHandlers();
+ }
+
+ setupFileDropZone(dropZoneId, inputId, multiple = true) {
+ const dropZone = document.getElementById(dropZoneId);
+ const fileInput = document.getElementById(inputId);
+
+ if (!dropZone || !fileInput) return;
+
+ fileInput.multiple = multiple;
+
+ // Click to select files
+ dropZone.addEventListener("click", () => fileInput.click());
+
+ // Drag and drop handlers
+ this.setupDragAndDrop(dropZone, fileInput, multiple);
+
+ // File input change handler
+ fileInput.addEventListener("change", () => {
+ this.updateFileList(dropZoneId, fileInput.files);
+ });
+ }
+
+ openFileSelector(dropZoneId) {
+ const inputElement = document.getElementById(`${dropZoneId}-input`);
+ inputElement?.click();
+ }
+
+ setupDragAndDrop(dropZone, fileInput, multiple) {
+ dropZone.addEventListener("dragover", (e) => {
+ e.preventDefault();
+ dropZone.classList.add("dragover");
+ });
+
+ dropZone.addEventListener("dragleave", () => {
+ dropZone.classList.remove("dragover");
+ });
+
+ dropZone.addEventListener("drop", (e) => {
+ e.preventDefault();
+ dropZone.classList.remove("dragover");
+
+ if (multiple) {
+ fileInput.files = e.dataTransfer.files;
+ } else {
+ fileInput.files =
+ e.dataTransfer.files.length > 0
+ ? [e.dataTransfer.files[0]]
+ : new DataTransfer().files;
+ }
+ this.updateFileList(dropZone.id, fileInput.files);
+ });
+ }
+
+ updateFileList(dropZoneId, files) {
+ const dropZone = document.getElementById(dropZoneId);
+ const fileList = dropZone.querySelector(".file-list");
+ const placeholder = dropZone.querySelector(".drop-placeholder");
+
+ if (files.length > 0) {
+ if (placeholder) placeholder.classList.add("hidden");
+ fileList.innerHTML = "";
+
+ Array.from(files).forEach((file, index) => {
+ const fileItem = this.createFileItem(file, dropZoneId, index);
+ fileList.appendChild(fileItem);
+ });
+ } else {
+ if (placeholder) placeholder.classList.remove("hidden");
+ fileList.innerHTML = "";
+ }
+ }
+
+ createFileItem(file, dropZoneId, index) {
+ const fileItem = document.createElement("div");
+ fileItem.className =
+ "flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-700 rounded-lg mb-2";
+ fileItem.innerHTML = `
+
, because it just gets in the way.
+ from_box.parentNode.removeChild(p);
+ } else if (p.classList.contains("help")) {
+ // Move help text up to the top so it isn't below the select
+ // boxes or wrapped off on the side to the right of the add
+ // button:
+ from_box.parentNode.insertBefore(p, from_box.parentNode.firstChild);
+ }
+ }
+
+ //
or
+ const selector_div = quickElement('div', from_box.parentNode);
+ // Make sure the selector div is at the beginning so that the
+ // add link would be displayed to the right of the widget.
+ from_box.parentNode.prepend(selector_div);
+ selector_div.className = is_stacked ? 'selector stacked' : 'selector';
+
+ //
+ const selector_available = quickElement('div', selector_div);
+ selector_available.className = 'selector-available';
+ const title_available = quickElement('h2', selector_available, interpolate(gettext('Available %s') + ' ', [field_name]));
+ quickElement(
+ 'span', title_available, '',
+ 'class', 'help help-tooltip help-icon',
+ 'title', interpolate(
+ gettext(
+ 'This is the list of available %s. You may choose some by ' +
+ 'selecting them in the box below and then clicking the ' +
+ '"Choose" arrow between the two boxes.'
+ ),
+ [field_name]
+ )
+ );
+
+ const filter_p = quickElement('p', selector_available, '', 'id', field_id + '_filter');
+ filter_p.className = 'selector-filter';
+
+ const search_filter_label = quickElement('label', filter_p, '', 'for', field_id + '_input');
+
+ quickElement(
+ 'span', search_filter_label, '',
+ 'class', 'help-tooltip search-label-icon',
+ 'title', interpolate(gettext("Type into this box to filter down the list of available %s."), [field_name])
+ );
+
+ filter_p.appendChild(document.createTextNode(' '));
+
+ const filter_input = quickElement('input', filter_p, '', 'type', 'text', 'placeholder', gettext("Filter"));
+ filter_input.id = field_id + '_input';
+
+ selector_available.appendChild(from_box);
+ const choose_all = quickElement('a', selector_available, gettext('Choose all'), 'title', interpolate(gettext('Click to choose all %s at once.'), [field_name]), 'href', '#', 'id', field_id + '_add_all_link');
+ choose_all.className = 'selector-chooseall';
+
+ //
");
+ addButton = $this.filter(":last").next().find("a");
+ }
+ }
+ addButton.on('click', addInlineClickHandler);
+ };
+
+ const addInlineClickHandler = function(e) {
+ e.preventDefault();
+ const template = $("#" + options.prefix + "-empty");
+ const row = template.clone(true);
+ row.removeClass(options.emptyCssClass)
+ .addClass(options.formCssClass)
+ .attr("id", options.prefix + "-" + nextIndex);
+ addInlineDeleteButton(row);
+ row.find("*").each(function() {
+ updateElementIndex(this, options.prefix, totalForms.val());
+ });
+ // Insert the new form when it has been fully edited.
+ row.insertBefore($(template));
+ // Update number of total forms.
+ $(totalForms).val(parseInt(totalForms.val(), 10) + 1);
+ nextIndex += 1;
+ // Hide the add button if there's a limit and it's been reached.
+ if ((maxForms.val() !== '') && (maxForms.val() - totalForms.val()) <= 0) {
+ addButton.parent().hide();
+ }
+ // Show the remove buttons if there are more than min_num.
+ toggleDeleteButtonVisibility(row.closest('.inline-group'));
+
+ // Pass the new form to the post-add callback, if provided.
+ if (options.added) {
+ options.added(row);
+ }
+ row.get(0).dispatchEvent(new CustomEvent("formset:added", {
+ bubbles: true,
+ detail: {
+ formsetName: options.prefix
+ }
+ }));
+ };
+
+ /**
+ * The "X" button that is part of every unsaved inline.
+ * (When saved, it is replaced with a "Delete" checkbox.)
+ */
+ const addInlineDeleteButton = function(row) {
+ if (row.is("tr")) {
+ // If the forms are laid out in table rows, insert
+ // the remove button into the last table cell:
+ row.children(":last").append('