From 1ddec41cb4583d65abb4f89150a564ae1d0bda28 Mon Sep 17 00:00:00 2001 From: Nikolay Karpov Date: Wed, 28 Jan 2026 15:53:57 -0800 Subject: [PATCH 1/3] update nemo to 2.3.2 Signed-off-by: Nikolay Karpov --- dataset_configs/multilingual/granary/config.yaml | 2 +- docker/Dockerfile.tts_sdp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dataset_configs/multilingual/granary/config.yaml b/dataset_configs/multilingual/granary/config.yaml index 78e778b0..2a476b88 100644 --- a/dataset_configs/multilingual/granary/config.yaml +++ b/dataset_configs/multilingual/granary/config.yaml @@ -71,7 +71,7 @@ documentation: | ``ConvertToTarredAudioDataset`` *(optional, only if tar-sharding is enabled)*:: - pip install lhotse "nemo-toolkit[common]==2.2.1" + pip install lhotse "nemo-toolkit[common]==2.3.2" Quick start ----------- diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp index f174c7b1..8f31b559 100644 --- a/docker/Dockerfile.tts_sdp +++ b/docker/Dockerfile.tts_sdp @@ -41,6 +41,6 @@ RUN pip install -r requirements/tts.txt RUN pip install flash-attn --no-build-isolation RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip RUN pip install megatron-core transformer_engine[pytorch]==2.4.0 -RUN pip install nemo_toolkit['all']==2.1.0 +RUN pip install nemo_toolkit['all']==2.3.2 WORKDIR /src/NeMo-speech-data-processor \ No newline at end of file From 8cd892f84446812eb6b891e2ae77c226b06d5b81 Mon Sep 17 00:00:00 2001 From: fqian1107 <110878019+fqian1107@users.noreply.github.com> Date: Wed, 4 Feb 2026 13:44:07 +0800 Subject: [PATCH 2/3] try no-build-isolation (#163) Signed-off-by: Fan Qian --- docker/Dockerfile.tts_sdp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp index 8f31b559..f3a009b7 100644 --- a/docker/Dockerfile.tts_sdp +++ b/docker/Dockerfile.tts_sdp @@ -40,7 +40,7 @@ RUN pip install -r requirements/main.txt RUN pip install -r requirements/tts.txt RUN pip install flash-attn --no-build-isolation RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip -RUN pip install megatron-core transformer_engine[pytorch]==2.4.0 +RUN pip install --no-build-isolation megatron-core transformer_engine[pytorch]==2.4.0 RUN pip install nemo_toolkit['all']==2.3.2 WORKDIR /src/NeMo-speech-data-processor \ No newline at end of file From 17a5384c90ee52601dcb7bc7997a1dc186c135dd Mon Sep 17 00:00:00 2001 From: Fan Qian Date: Fri, 6 Feb 2026 11:18:40 +0800 Subject: [PATCH 3/3] Update nemo timestamp Signed-off-by: Fan Qian --- sdp/processors/tts/nemo_asr_align.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdp/processors/tts/nemo_asr_align.py b/sdp/processors/tts/nemo_asr_align.py index 9a71c476..fd6a077f 100644 --- a/sdp/processors/tts/nemo_asr_align.py +++ b/sdp/processors/tts/nemo_asr_align.py @@ -119,7 +119,7 @@ def get_alignments_text(self, hypotheses): - list: List of dictionaries with word alignments (word, start, end) - str: The transcribed text """ - timestamp_dict = hypotheses.timestep # extract timesteps from hypothesis of first (and only) audio file + timestamp_dict = hypotheses.timestamp # extract timesteps from hypothesis of first (and only) audio file # For a FastConformer model, you can display the word timestamps as follows: # 80ms is duration of a timestep at output of the Conformer