diff --git a/asr-finetune-parakeet-nemo.ipynb b/asr-finetune-parakeet-nemo.ipynb index 2f3863f..af9b2bf 100644 --- a/asr-finetune-parakeet-nemo.ipynb +++ b/asr-finetune-parakeet-nemo.ipynb @@ -65,17 +65,42 @@ " a. Click **Runtime** > **Restart Runtime** for any upgraded packages to take effect.\n", "\"\"\"\n", "\n", + "# Before running this notebook\n", + " # pull docker container: docker pull nvcr.io/nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 (recommended)\n", + " # launch container and run below\n", + " # `apt-get update`\n", + " # install python3.10.12 and pip\n", + " # `apt-get install -y --no-install-recommends python3.10 python3.10-venv python3.10-distutils python3.10-dev`\n", + " # `apt-get install -y --no-install-recommends python3-pip`\n", + " # `update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1`\n", + " # `update-alternatives --set python /usr/bin/python3.10`\n", + " # `python --version`\n", + "\n", + "# run this notebook (you might need to run this cell two times where the site-packages need to be synced in first run)\n", + "\n", "# Install Dependencies\n", - "!pip install wget\n", - "!apt-get install sox libsndfile1 ffmpeg libsox-fmt-mp3 jq\n", - "!pip install text-unidecode\n", - "!pip install matplotlib>=3.3.2\n", - "!pip install Cython\n", - "!pip3 install --no-cache-dir huggingface-hub==0.23.2\n", + "!pip3 install wget\n", + "!apt-get install -y wget sox libsndfile1 ffmpeg libsox-fmt-mp3 jq git # if failed, run `apt-get update` first\n", + "!pip3 install text-unidecode\n", + "!pip3 install matplotlib\n", + "!pip3 install Cython\n", + "!pip3 install torch==2.3.0\n", + "!pip3 install librosa\n", + "\n", "\n", - "## Install NeMo\n", - "BRANCH = 'v1.23.0'\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", + "# Install NeMo\n", + "BRANCH = 'v2.0.0'\n", + "!python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", + "\n", + "# Modify some package for dependency consideration\n", + "!pip3 install numpy==1.24.1\n", + "!pip3 install --no-cache-dir huggingface-hub==0.24.5\n", + "!pip3 install transformers==4.40.2\n", + "!pip3 install numba==0.59.0\n", + "!pip3 install pytorch-lightning==2.3.3\n", + "!pip3 install lightning-utilities==0.10.1\n", + "!pip3 install omegaconf==2.3.0\n", + "!pip3 install jiwer==3.0.4\n", "\n", "\"\"\"\n", "Remember to restart the runtime for the kernel to pick up any upgraded packages (e.g. matplotlib)!\n", @@ -300,8 +325,9 @@ "source": [ "# To fully train the model, you'll need to increase trainer.max_epochs from 1.\n", "# Empirical evidence suggests that around 200 epochs should suffice.\n", - "NEMO_DIR = 'FIX_ME/path/to/NeMo'\n", - "! git clone -b $BRANCH https://github.com/NVIDIA/NeMo $NEMO_DIR\n", + "NEMO_DIR = '/workspace/NeMo'\n", + "BRANCH='v2.0.0'\n", + "!git clone -b $BRANCH https://github.com/NVIDIA/NeMo $NEMO_DIR\n", "!python $NEMO_DIR/examples/asr/speech_to_text_finetune.py \\\n", " --config-path=\"../asr/conf/fastconformer/hybrid_transducer_ctc/\" --config-name=fastconformer_hybrid_transducer_ctc_bpe \\\n", " +init_from_pretrained_model=stt_en_fastconformer_hybrid_large_pc \\\n", @@ -322,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "id": "_w2KBGHaSf4S" }, @@ -417,7 +443,7 @@ "source": [ "!pip install nvidia-pyindex\n", "!ngc registry resource download-version \"nvidia/riva/riva_quickstart:\"$__riva_version__\n", - "!pip install nemo2riva\n", + "!pip install nemo2riva==2.19.0\n", "!pip install protobuf==3.20.0" ] }, @@ -443,7 +469,7 @@ "outputs": [], "source": [ "riva_file_path = ctc_model_path[:-5]+\".riva\"\n", - "!nemo2riva --key=nemotoriva --onnx-opset 18 --out $riva_file_path $ctc_model_path" + "!nemo2riva --key=nemotoriva --max-dim 1000 --onnx-opset 18 --out $riva_file_path $ctc_model_path" ] }, { @@ -482,7 +508,7 @@ }, "gpuClass": "standard", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" },