From a3c9b1be0213ecb96ad37cf08c4c74fba00bc7fd Mon Sep 17 00:00:00 2001 From: Gregory Kielian Date: Thu, 5 Oct 2023 22:26:31 +0000 Subject: [PATCH 1/3] Add llama.cpp as a submodule This will be used by data augmentation llms like mistral --- .gitmodules | 3 +++ modules/llama.cpp | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 modules/llama.cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..83a874f8f0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "modules/llama.cpp"] + path = modules/llama.cpp + url = https://github.com/gkielian/llama.cpp.git diff --git a/modules/llama.cpp b/modules/llama.cpp new file mode 160000 index 0000000000..48edda30ee --- /dev/null +++ b/modules/llama.cpp @@ -0,0 +1 @@ +Subproject commit 48edda30ee545fdac2e7a33d505382888f748bbf From 4873354f3a3afd4affbe8f69919986b3af810945 Mon Sep 17 00:00:00 2001 From: Gregory Kielian Date: Thu, 5 Oct 2023 22:27:31 +0000 Subject: [PATCH 2/3] Add install and test scripts for llama_cpp_python Tests gpu running with mistral Q5_K_M --- install_llama_cpp_python.sh | 67 +++++++++++++++++++++++++++++++++++++ test.py | 10 ++++++ test_mistral.sh | 11 ++++++ 3 files changed, 88 insertions(+) create mode 100644 install_llama_cpp_python.sh create mode 100644 test.py create mode 100644 test_mistral.sh diff --git a/install_llama_cpp_python.sh b/install_llama_cpp_python.sh new file mode 100644 index 0000000000..1ee6267a8c --- /dev/null +++ b/install_llama_cpp_python.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Set strict error handling +set -euo pipefail + +# Get current script directory +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" + +echo "This script will install llama-cpp-python with GPU support" + +# Check if llama module exists, prompt to initialize submodule +if [ ! -d "$script_dir/modules/llama.cpp" ]; then + + read -p "llama.cpp module not found. Download with git? [Y/n] " response + + response=${response,,} + + if [ "$response" != "n" ]; then + + # Initialize llama submodule + git submodule update --init --recursive + + else + + echo "Exiting. llama.cpp module required." + exit 1 + + fi + +fi + +read -p "Enter CUDA install location (default /usr/local/cuda): " cuda_home + +cuda_home=${cuda_home:-/usr/local/cuda} + +if [ ! -d "$cuda_home" ]; then + echo "Error: $cuda_home is not a valid directory" + exit 1 +fi + +read -p "Append CUDA settings to ~/.bashrc? [Y/n] " response + +response=${response,,} + +if [ "$response" != "n" ]; then + + echo "export CUDA_HOME=$cuda_home" >> ~/.bashrc + echo "export PATH=\"$cuda_home/bin:\$PATH\"" >> ~/.bashrc + echo "export LLAMA_CUBLAS=on" >> ~/.bashrc + echo "export LLAMA_CPP_LIB=\"$script_dir/modules/llama.cpp/libllama.so\"" >> ~/.bashrc + + echo "Appended CUDA settings to ~/.bashrc" + +fi + +pushd "$script_dir/modules/llama.cpp" + +make clean +make libllama.so || { echo "Error compiling llama.cpp"; exit 1; } + +popd + +export LLAMA_CPP_LIB="$script_dir/modules/llama.cpp/libllama.so" + +CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install llama-cpp-python --no-cache-dir + +echo "llama-cpp-python installed successfully" diff --git a/test.py b/test.py new file mode 100644 index 0000000000..da28131ab0 --- /dev/null +++ b/test.py @@ -0,0 +1,10 @@ +from llama_cpp import Llama + +text = """[INST] What is your favourite condiment? [/INST] +Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen! +[INST] Do you have mayonnaise recipes? [/INST]""" + +llm = Llama(model_path="./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf", n_ctx=2048, + n_threads=8, n_gpu_layers=300, verbose=True) +output = llm(text, max_tokens=256, stop=["[INST]"], echo=True) +print(output) diff --git a/test_mistral.sh b/test_mistral.sh new file mode 100644 index 0000000000..5d780bdb01 --- /dev/null +++ b/test_mistral.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ -f "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf" ]; then + echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file found, continuing" +else + echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file not found, downloading" + wget -P ./models https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf +fi + +python3 test.py + From 32f30b7001911eda506ae346e5fef5f49af3199c Mon Sep 17 00:00:00 2001 From: Gregory Kielian Date: Thu, 5 Oct 2023 22:45:58 +0000 Subject: [PATCH 3/3] Add README and documentation for mistral --- data_augmentation/README.md | 25 +++++++++++++++++++ .../download_and_test_mistral7b.sh | 8 +++++- .../llama-cpp-python_example.py | 4 ++- 3 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 data_augmentation/README.md rename test_mistral.sh => data_augmentation/download_and_test_mistral7b.sh (70%) rename test.py => data_augmentation/llama-cpp-python_example.py (88%) diff --git a/data_augmentation/README.md b/data_augmentation/README.md new file mode 100644 index 0000000000..a1c42e57ac --- /dev/null +++ b/data_augmentation/README.md @@ -0,0 +1,25 @@ +# Running Mistral 7B Test + +Scripts here download and test the Mistral 7B model with a python wrapper for +`llama.cpp` called `llama-cpp-python`. + +## Install Steps + +1. First install the nanogpt requirements (see main [README.md](../README.md)) +2. Second install `llama-cpp-python` and dependencies via the installation + script provided in the repo root directory: + +```bash +bash install_llama_cpp_python.sh +``` +3. Finally cd into this directory and run the `download_and_test_mistral.sh` + script via sourcing (b/c will need one's python environment): + +```bash +source download_and_test_mistral7b.sh +``` + +This script will download mistral7b if not already in the `./models` directory, +and start the `llama-cpp-python_example.py` script. + +This will should complete fairly quickly with GPU acceleration. diff --git a/test_mistral.sh b/data_augmentation/download_and_test_mistral7b.sh similarity index 70% rename from test_mistral.sh rename to data_augmentation/download_and_test_mistral7b.sh index 5d780bdb01..23b5b3ae6d 100644 --- a/test_mistral.sh +++ b/data_augmentation/download_and_test_mistral7b.sh @@ -1,5 +1,11 @@ #!/bin/bash +# This script will downlaod and test Mistral7B using llama-cpp-python + +if [ -d ./models ]; then + mkdir -p ./models +fi + if [ -f "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf" ]; then echo "./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf file found, continuing" else @@ -7,5 +13,5 @@ else wget -P ./models https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf fi -python3 test.py +python3 llama-cpp-python_example.py diff --git a/test.py b/data_augmentation/llama-cpp-python_example.py similarity index 88% rename from test.py rename to data_augmentation/llama-cpp-python_example.py index da28131ab0..bf7dac5312 100644 --- a/test.py +++ b/data_augmentation/llama-cpp-python_example.py @@ -5,6 +5,8 @@ [INST] Do you have mayonnaise recipes? [/INST]""" llm = Llama(model_path="./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf", n_ctx=2048, - n_threads=8, n_gpu_layers=300, verbose=True) + n_threads=8, n_gpu_layers=35, verbose=True) + output = llm(text, max_tokens=256, stop=["[INST]"], echo=True) + print(output)