From 1ddec41cb4583d65abb4f89150a564ae1d0bda28 Mon Sep 17 00:00:00 2001
From: Nikolay Karpov <nkarpov@nvidia.com>
Date: Wed, 28 Jan 2026 15:53:57 -0800
Subject: [PATCH 1/3] update nemo to 2.3.2

Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>
---
 dataset_configs/multilingual/granary/config.yaml | 2 +-
 docker/Dockerfile.tts_sdp                        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dataset_configs/multilingual/granary/config.yaml b/dataset_configs/multilingual/granary/config.yaml
index 78e778b0..2a476b88 100644
--- a/dataset_configs/multilingual/granary/config.yaml
+++ b/dataset_configs/multilingual/granary/config.yaml
@@ -71,7 +71,7 @@ documentation: |
 
   ``ConvertToTarredAudioDataset`` *(optional, only if tar-sharding is enabled)*::
 
-      pip install lhotse "nemo-toolkit[common]==2.2.1"
+      pip install lhotse "nemo-toolkit[common]==2.3.2"
 
   Quick start
   -----------
diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp
index f174c7b1..8f31b559 100644
--- a/docker/Dockerfile.tts_sdp
+++ b/docker/Dockerfile.tts_sdp
@@ -41,6 +41,6 @@ RUN pip install -r requirements/tts.txt
 RUN pip install flash-attn --no-build-isolation
 RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip
 RUN pip install megatron-core transformer_engine[pytorch]==2.4.0
-RUN pip install nemo_toolkit['all']==2.1.0 
+RUN pip install nemo_toolkit['all']==2.3.2
 
 WORKDIR /src/NeMo-speech-data-processor
\ No newline at end of file

From 8cd892f84446812eb6b891e2ae77c226b06d5b81 Mon Sep 17 00:00:00 2001
From: fqian1107 <110878019+fqian1107@users.noreply.github.com>
Date: Wed, 4 Feb 2026 13:44:07 +0800
Subject: [PATCH 2/3] try no-build-isolation (#163)

Signed-off-by: Fan Qian <fqian@nvidia.com>
---
 docker/Dockerfile.tts_sdp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp
index 8f31b559..f3a009b7 100644
--- a/docker/Dockerfile.tts_sdp
+++ b/docker/Dockerfile.tts_sdp
@@ -40,7 +40,7 @@ RUN pip install -r requirements/main.txt
 RUN pip install -r requirements/tts.txt
 RUN pip install flash-attn --no-build-isolation
 RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip
-RUN pip install megatron-core transformer_engine[pytorch]==2.4.0
+RUN pip install --no-build-isolation megatron-core transformer_engine[pytorch]==2.4.0
 RUN pip install nemo_toolkit['all']==2.3.2
 
 WORKDIR /src/NeMo-speech-data-processor
\ No newline at end of file

From 17a5384c90ee52601dcb7bc7997a1dc186c135dd Mon Sep 17 00:00:00 2001
From: Fan Qian <fqian@nvidia.com>
Date: Fri, 6 Feb 2026 11:18:40 +0800
Subject: [PATCH 3/3] Update nemo timestamp

Signed-off-by: Fan Qian <fqian@nvidia.com>
---
 sdp/processors/tts/nemo_asr_align.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdp/processors/tts/nemo_asr_align.py b/sdp/processors/tts/nemo_asr_align.py
index 9a71c476..fd6a077f 100644
--- a/sdp/processors/tts/nemo_asr_align.py
+++ b/sdp/processors/tts/nemo_asr_align.py
@@ -119,7 +119,7 @@ def get_alignments_text(self, hypotheses):
                 - list: List of dictionaries with word alignments (word, start, end)
                 - str: The transcribed text
         """
-        timestamp_dict = hypotheses.timestep # extract timesteps from hypothesis of first (and only) audio file
+        timestamp_dict = hypotheses.timestamp # extract timesteps from hypothesis of first (and only) audio file
 
         # For a FastConformer model, you can display the word timestamps as follows:
         # 80ms is duration of a timestep at output of the Conformer