diff --git a/dataset_configs/multilingual/granary/config.yaml b/dataset_configs/multilingual/granary/config.yaml
index 78e778b0..2a476b88 100644
--- a/dataset_configs/multilingual/granary/config.yaml
+++ b/dataset_configs/multilingual/granary/config.yaml
@@ -71,7 +71,7 @@ documentation: |
 
   ``ConvertToTarredAudioDataset`` *(optional, only if tar-sharding is enabled)*::
 
-      pip install lhotse "nemo-toolkit[common]==2.2.1"
+      pip install lhotse "nemo-toolkit[common]==2.3.2"
 
   Quick start
   -----------
diff --git a/docker/Dockerfile.tts_sdp b/docker/Dockerfile.tts_sdp
index f174c7b1..f3a009b7 100644
--- a/docker/Dockerfile.tts_sdp
+++ b/docker/Dockerfile.tts_sdp
@@ -40,7 +40,7 @@ RUN pip install -r requirements/main.txt
 RUN pip install -r requirements/tts.txt
 RUN pip install flash-attn --no-build-isolation
 RUN pip install https://github.com/LahiLuk/YouTokenToMe/archive/master.zip
-RUN pip install megatron-core transformer_engine[pytorch]==2.4.0
-RUN pip install nemo_toolkit['all']==2.1.0 
+RUN pip install --no-build-isolation megatron-core transformer_engine[pytorch]==2.4.0
+RUN pip install nemo_toolkit['all']==2.3.2
 
 WORKDIR /src/NeMo-speech-data-processor
\ No newline at end of file
diff --git a/sdp/processors/tts/nemo_asr_align.py b/sdp/processors/tts/nemo_asr_align.py
index 9a71c476..fd6a077f 100644
--- a/sdp/processors/tts/nemo_asr_align.py
+++ b/sdp/processors/tts/nemo_asr_align.py
@@ -119,7 +119,7 @@ def get_alignments_text(self, hypotheses):
                 - list: List of dictionaries with word alignments (word, start, end)
                 - str: The transcribed text
         """
-        timestamp_dict = hypotheses.timestep # extract timesteps from hypothesis of first (and only) audio file
+        timestamp_dict = hypotheses.timestamp # extract timesteps from hypothesis of first (and only) audio file
 
         # For a FastConformer model, you can display the word timestamps as follows:
         # 80ms is duration of a timestep at output of the Conformer