Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "UltraSinger"
version = "0.0.13.dev14"
version = "0.0.13.dev15"
description = "A tool to create UltraStar karaoke files from audio files"
requires-python = ">=3.12"
dependencies = [
Expand All @@ -16,8 +16,7 @@ dependencies = [
"langcodes",
"language_data",
"packaging",
"librosa>=0.10.2",
"numba>=0.59.0",
"librosa",
"swift-f0",
"pydub",
"demucs",
Expand Down
2 changes: 1 addition & 1 deletion src/Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@dataclass
class Settings:

APP_VERSION = "0.0.13.dev14"
APP_VERSION = "0.0.13.dev15"
CONFIDENCE_THRESHOLD = 0.6
CONFIDENCE_PROMPT_TIMEOUT = 4

Expand Down
14 changes: 9 additions & 5 deletions src/UltraSinger.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ def run() -> tuple[str, Score, Score]:
# Create process audio
process_data.process_data_paths.processing_audio_path = CreateProcessAudio(process_data)

# Get BPM from wav file
if not settings.input_file_is_ultrastar_txt:
process_data.media_info.bpm = get_bpm_from_file(process_data.process_data_paths.processing_audio_path)

# Detect key
detected_key, detected_mode = detect_key_from_audio(process_data.process_data_paths.processing_audio_path)
if process_data.media_info.music_key is None:
Expand Down Expand Up @@ -611,15 +615,17 @@ def transcribe_audio(cache_folder_path: str, processing_audio_path: str) -> Tran
transcription_result = None
whisper_align_model_string = None
if settings.transcriber == "whisper":
if not settings.whisper_align_model is None: whisper_align_model_string = settings.whisper_align_model.replace("/", "_")
transcription_config = f"{settings.transcriber}_{settings.whisper_model.value}_{settings.pytorch_device}_{whisper_align_model_string}_{settings.whisper_batch_size}_{settings.whisper_compute_type}_{settings.language}"
if not settings.whisper_align_model is None:
whisper_align_model_string = settings.whisper_align_model.replace("/", "_")
whisper_device = "cpu" if settings.force_whisper_cpu else settings.pytorch_device
transcription_config = f"{settings.transcriber}_{settings.whisper_model.value}_{whisper_device}_{whisper_align_model_string}_{settings.whisper_batch_size}_{settings.whisper_compute_type}_{settings.language}"
transcription_path = os.path.join(cache_folder_path, f"{transcription_config}.json")
cached_transcription_available = check_file_exists(transcription_path)
if settings.skip_cache_transcription or not cached_transcription_available:
transcription_result = transcribe_with_whisper(
processing_audio_path,
settings.whisper_model,
settings.pytorch_device,
whisper_device,
settings.whisper_align_model,
settings.whisper_batch_size,
settings.whisper_compute_type,
Expand Down Expand Up @@ -684,7 +690,6 @@ def infos_from_audio_video_input_file() -> tuple[str, str, str, MediaInfo]:
if song_info.cover_image_data is not None:
save_image(song_info.cover_image_data, basename_without_ext, song_folder_output_path)

real_bpm = get_bpm_from_file(ultrastar_audio_input_path)
return (
basename_without_ext,
song_folder_output_path,
Expand All @@ -694,7 +699,6 @@ def infos_from_audio_video_input_file() -> tuple[str, str, str, MediaInfo]:
title=song_info.title,
year=song_info.year,
genre=song_info.genres,
bpm=real_bpm,
cover_url=song_info.cover_url,
audio_extension=audio_ext,
video_extension=video_ext
Expand Down
11 changes: 9 additions & 2 deletions src/modules/Audio/bpm.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
import librosa
import soundfile as sf

from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted


def get_bpm_from_data(data, sampling_rate):
"""Get real bpm from audio data"""
onset_env = librosa.onset.onset_strength(y=data, sr=sampling_rate)
wav_tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sampling_rate)
wav_tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sampling_rate)

print(f"{ULTRASINGER_HEAD} BPM is {blue_highlighted(str(round(wav_tempo[0], 2)))}")
return wav_tempo[0]


def get_bpm_from_file(wav_file: str) -> float:
"""Get real bpm from audio file"""
data, sampling_rate = librosa.load(wav_file, sr=None)
data, sampling_rate = sf.read(wav_file, dtype='float32')
# Transpose if stereo to match librosa's expected format
if len(data.shape) > 1:
data = data.T
# Convert to mono if stereo
if data.ndim > 1:
data = librosa.to_mono(data)
return get_bpm_from_data(data, sampling_rate)
2 changes: 0 additions & 2 deletions src/modules/Audio/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ def download_from_youtube(input_url: str, output_folder_path: str, cookiefile: s
input_url, basename_without_ext, song_output, cookiefile
)

real_bpm = get_bpm_from_file(audio_file_path)
return (
basename_without_ext,
song_output,
Expand All @@ -126,7 +125,6 @@ def download_from_youtube(input_url: str, output_folder_path: str, cookiefile: s
title=song_info.title,
year=song_info.year,
genre=song_info.genres,
bpm=real_bpm,
cover_url=cover_url,
video_url=input_url,
audio_extension=audio_ext,
Expand Down
2 changes: 1 addition & 1 deletion src/modules/ProcessData.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class MediaInfo:
"""Media Info"""
title: str
artist: str
bpm: float
bpm: Optional[float] = None
year: Optional[str] = None
genre: Optional[str] = None
language: Optional[str] = None
Expand Down
Loading