From a3c9b1be0213ecb96ad37cf08c4c74fba00bc7fd Mon Sep 17 00:00:00 2001
From: Gregory Kielian <gkielian@google.com>
Date: Thu, 5 Oct 2023 22:26:31 +0000
Subject: [PATCH 1/3] Add llama.cpp as a submodule

This will be used by data augmentation llms like mistral
---
 .gitmodules       | 3 +++
 modules/llama.cpp | 1 +
 2 files changed, 4 insertions(+)
 create mode 100644 .gitmodules
 create mode 160000 modules/llama.cpp
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000..83a874f8f0
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "modules/llama.cpp"]
+	path = modules/llama.cpp
+	url = https://github.com/gkielian/llama.cpp.git
diff --git a/modules/llama.cpp b/modules/llama.cpp
new file mode 160000
index 0000000000..48edda30ee
--- /dev/null
+++ b/modules/llama.cpp
@@ -0,0 +1 @@
+Subproject commit 48edda30ee545fdac2e7a33d505382888f748bbf

From 4873354f3a3afd4affbe8f69919986b3af810945 Mon Sep 17 00:00:00 2001
From: Gregory Kielian <gkielian@google.com>
Date: Thu, 5 Oct 2023 22:27:31 +0000
Subject: [PATCH 2/3] Add install and test scripts for llama_cpp_python

Tests gpu running with mistral Q5_K_M
---
 install_llama_cpp_python.sh | 67 +++++++++++++++++++++++++++++++++++++
 test.py                     | 10 ++++++
 test_mistral.sh             | 11 ++++++
 3 files changed, 88 insertions(+)
 create mode 100644 install_llama_cpp_python.sh
 create mode 100644 test.py
 create mode 100644 test_mistral.sh

diff --git a/install_llama_cpp_python.sh b/install_llama_cpp_python.sh
new file mode 100644
index 0000000000..1ee6267a8c
--- /dev/null
+++ b/install_llama_cpp_python.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Set strict error handling
+set -euo pipefail
+
+# Get current script directory
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
+
+echo "This script will install llama-cpp-python with GPU support"
+
+# Check if llama module exists, prompt to initialize submodule
+if [ ! -d "$script_dir/modules/llama.cpp" ]; then
+
+  read -p "llama.cpp module not found. Download with git? [Y/n] " response
+
+  response=${response,,}
+
+  if [ "$response" != "n" ]; then
+
+    # Initialize llama submodule
+    git submodule update --init --recursive
+
+  else
+
+    echo "Exiting. llama.cpp module required."
+    exit 1
+
+  fi
+
+fi
+
+read -p "Enter CUDA install location (default /usr/local/cuda): " cuda_home
+
+cuda_home=${cuda_home:-/usr/local/cuda}
+
+if [ ! -d "$cuda_home" ]; then
+  echo "Error: $cuda_home is not a valid directory"
+  exit 1
+fi
+
+read -p "Append CUDA settings to ~/.bashrc? [Y/n] " response
+
+response=${response,,}
+
+if [ "$response" != "n" ]; then
+
+  echo "export CUDA_HOME=$cuda_home" >> ~/.bashrc
+  echo "export PATH=\"$cuda_home/bin:\$PATH\"" >> ~/.bashrc
+  echo "export LLAMA_CUBLAS=on" >> ~/.bashrc
+  echo "export LLAMA_CPP_LIB=\"$script_dir/modules/llama.cpp/libllama.so\"" >> ~/.bashrc
+
+  echo "Appended CUDA settings to ~/.bashrc"
+
+fi
+
+pushd "$script_dir/modules/llama.cpp"
+
+make clean
+make libllama.so || { echo "Error compiling llama.cpp"; exit 1; }
+
+popd
+
+export LLAMA_CPP_LIB="$script_dir/modules/llama.cpp/libllama.so"
+
+CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install llama-cpp-python --no-cache-dir
+
+echo "llama-cpp-python installed successfully"
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000..da28131ab0
--- /dev/null
+++ b/test.py
@@ -0,0 +1,10 @@
+from llama_cpp import Llama
+
+text = """<s>[INST] What is your favourite condiment? [/INST]
+Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!</s>
+[INST] Do you have mayonnaise recipes? [/INST]"""
+
+llm = Llama(model_path="./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf", n_ctx=2048,
+            n_threads=8, n_gpu_layers=300, verbose=True)
+output = llm(text, max_tokens=256, stop=["[INST]"], echo=True)
+print(output)
diff --git a/test_mistral.sh b/test_mistral.sh
new file mode 100644
index 0000000000..5d780bdb01
--- /dev/null
+++ b/test_mistral.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+if [ -f "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf" ]; then
+  echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file found, continuing"
+else
+  echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file not found, downloading"
+  wget -P ./models https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+fi
+
+python3 test.py
+

From 32f30b7001911eda506ae346e5fef5f49af3199c Mon Sep 17 00:00:00 2001
From: Gregory Kielian <gkielian@google.com>
Date: Thu, 5 Oct 2023 22:45:58 +0000
Subject: [PATCH 3/3] Add README and documentation for mistral

---
 data_augmentation/README.md                   | 25 +++++++++++++++++++
 .../download_and_test_mistral7b.sh            |  8 +++++-
 .../llama-cpp-python_example.py               |  4 ++-
 3 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 data_augmentation/README.md
 rename test_mistral.sh => data_augmentation/download_and_test_mistral7b.sh (70%)
 rename test.py => data_augmentation/llama-cpp-python_example.py (88%)

diff --git a/data_augmentation/README.md b/data_augmentation/README.md
new file mode 100644
index 0000000000..a1c42e57ac
--- /dev/null
+++ b/data_augmentation/README.md
@@ -0,0 +1,25 @@
+# Running Mistral 7B Test
+
+Scripts here download and test the Mistral 7B model with a python wrapper for
+`llama.cpp` called `llama-cpp-python`.
+
+## Install Steps
+
+1. First install the nanogpt requirements (see main [README.md](../README.md))
+2. Second install `llama-cpp-python` and dependencies via the installation
+   script provided in the repo root directory:
+
+```bash
+bash install_llama_cpp_python.sh
+```
+3. Finally cd into this directory and run the `download_and_test_mistral.sh`
+   script via sourcing (b/c will need one's python environment):
+
+```bash
+source download_and_test_mistral7b.sh
+```
+
+This script will download mistral7b if not already in the `./models` directory,
+and start the `llama-cpp-python_example.py` script.
+
+This will should complete fairly quickly with GPU acceleration.
diff --git a/test_mistral.sh b/data_augmentation/download_and_test_mistral7b.sh
similarity index 70%
rename from test_mistral.sh
rename to data_augmentation/download_and_test_mistral7b.sh
index 5d780bdb01..23b5b3ae6d 100644
--- a/test_mistral.sh
+++ b/data_augmentation/download_and_test_mistral7b.sh
@@ -1,5 +1,11 @@
 #!/bin/bash
 
+# This script will downlaod and test Mistral7B using llama-cpp-python
+
+if [ -d ./models ]; then
+  mkdir -p ./models
+fi
+
 if [ -f "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf" ]; then
   echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file found, continuing"
 else
@@ -7,5 +13,5 @@ else
   wget -P ./models https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf
 fi
 
-python3 test.py
+python3 llama-cpp-python_example.py
 
diff --git a/test.py b/data_augmentation/llama-cpp-python_example.py
similarity index 88%
rename from test.py
rename to data_augmentation/llama-cpp-python_example.py
index da28131ab0..bf7dac5312 100644
--- a/test.py
+++ b/data_augmentation/llama-cpp-python_example.py
@@ -5,6 +5,8 @@
 [INST] Do you have mayonnaise recipes? [/INST]"""
 
 llm = Llama(model_path="./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf", n_ctx=2048,
-            n_threads=8, n_gpu_layers=300, verbose=True)
+            n_threads=8, n_gpu_layers=35, verbose=True)
+
 output = llm(text, max_tokens=256, stop=["[INST]"], echo=True)
+
 print(output)