diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile new file mode 100644 index 0000000..25d1b59 --- /dev/null +++ b/official-templates/vllmray/Dockerfile @@ -0,0 +1,12 @@ +#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models +ARG BASE_IMAGE=non-existing +FROM ${BASE_IMAGE} + +ARG WHEEL_SRC +ARG TORCH + +RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC} +RUN python -m pip install vllm +RUN python -m pip install "ray[default]" + +COPY pre_start.sh /pre_start.sh diff --git a/official-templates/vllmray/README.md b/official-templates/vllmray/README.md new file mode 100644 index 0000000..0802604 --- /dev/null +++ b/official-templates/vllmray/README.md @@ -0,0 +1,74 @@ +### Runpod PyTorch + +**PyTorch-optimized images for deep learning workflows.** + +Built on our base images, these containers provide pre-configured PyTorch and CUDA combinations for immediate deep learning development. Skip the compatibility guesswork and setup time: just run, and start training. + +### What's included +- **Version matched**: PyTorch and CUDA combinations tested for optimal compatibility. +- **Zero setup**: PyTorch ready to import immediately, no additional installs required. +- **GPU accelerated**: Full CUDA support enabled for immediate deep learning acceleration. +- **Production ready**: Built on our stable base images with complete development toolchain. + +### Available configurations +- **PyTorch**: 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.7.1, and 2.8.0 +- **CUDA**: 12.4.1, 12.8.1, 12.9.0, and 13.0.0 (not available on Runpod) +- **Ubuntu**: 22.04 (Jammy) and 24.04 (Noble) + +Focus on your models, not your environment setup. + +Please also see [../base/README.md](../base/README.md) + +
+ +## Available PyTorch Images + +### CUDA 12.8.1: +- Torch 2.6.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2404` +- Torch 2.7.1: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2404` +- Torch 2.8.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2404` + +### CUDA 12.9.0: +- Torch 2.6.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2404` +- Torch 2.7.1: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2404` +- Torch 2.8.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2404` + +### CUDA 13.0.0: +- Torch 2.6.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2404` +- Torch 2.7.1: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2404` +- Torch 2.8.0: + - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2204` + - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2404` + +
+ CUDA 12.4.1 (Legacy): + ### CUDA 12.4.1: + - Torch 2.4.0: + - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch240-ubuntu2204` + - Torch 2.4.1: + - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch241-ubuntu2204` + - Torch 2.5.0: + - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch250-ubuntu2204` + - Torch 2.5.1: + - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch251-ubuntu2204` + - Torch 2.6.0: + - Ubuntu 20.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2004` + - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2204` +
+
\ No newline at end of file diff --git a/official-templates/vllmray/docker-bake.hcl b/official-templates/vllmray/docker-bake.hcl new file mode 100644 index 0000000..bb03c6e --- /dev/null +++ b/official-templates/vllmray/docker-bake.hcl @@ -0,0 +1,89 @@ +# https://pytorch.org/get-started/locally/ + +variable "TORCH_META" { + default = { + "2.8.0" = { + torchvision = "0.23.0" + } + "2.7.1" = { + torchvision = "0.22.1" + } + "2.6.0" = { + torchvision = "0.21.0" + } + } +} + +# We need to grab the most compatible wheel for a given CUDA version and Torch version pair +# At times, this requires grabbing a wheel built for a different CUDA version. +variable "CUDA_TORCH_COMBINATIONS" { + default = [ + { cuda_version = "12.8.1", torch = "2.6.0", whl_src = "126" }, + { cuda_version = "12.8.1", torch = "2.7.1", whl_src = "128" }, + { cuda_version = "12.8.1", torch = "2.8.0", whl_src = "128" }, + + { cuda_version = "12.9.0", torch = "2.6.0", whl_src = "126" }, + { cuda_version = "12.9.0", torch = "2.7.1", whl_src = "128" }, + { cuda_version = "12.9.0", torch = "2.8.0", whl_src = "129" }, + + { cuda_version = "13.0.0", torch = "2.6.0", whl_src = "126" }, + { cuda_version = "13.0.0", torch = "2.7.1", whl_src = "128" }, + { cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" } + ] +} + +variable "COMPATIBLE_BUILDS" { + default = flatten([ + for combo in CUDA_TORCH_COMBINATIONS : [ + for cuda in CUDA_VERSIONS : [ + for ubuntu in UBUNTU_VERSIONS : { + ubuntu_version = ubuntu.version + ubuntu_name = ubuntu.name + cuda_version = cuda.version + cuda_code = replace(cuda.version, ".", "") + wheel_src = combo.whl_src + torch = combo.torch + torch_code = replace(combo.torch, ".", "") + torch_vision = TORCH_META[combo.torch].torchvision + } if cuda.version == combo.cuda_version && contains(cuda.ubuntu, ubuntu.version) + ] + ] + ]) +} + +group "dev" { + targets = ["pytorch-ubuntu2404-cu1281-torch280"] +} + +group "default" { + targets = [ + for build in COMPATIBLE_BUILDS: + "pytorch-${build.ubuntu_name}-cu${replace(build.cuda_version, ".", "")}-torch${build.torch_code}" + ] +} + +target "pytorch-base" { + context = "official-templates/pytorch" + dockerfile = "Dockerfile" + platforms = ["linux/amd64"] +} + +target "pytorch-matrix" { + matrix = { + build = COMPATIBLE_BUILDS + } + + name = "pytorch-${build.ubuntu_name}-cu${build.cuda_code}-torch${build.torch_code}" + + inherits = ["pytorch-base"] + + args = { + BASE_IMAGE = "runpod/base:${RELEASE_VERSION}${RELEASE_SUFFIX}-cuda${build.cuda_code}-${build.ubuntu_name}" + WHEEL_SRC = build.wheel_src + TORCH = "torch==${build.torch} torchvision==${build.torch_vision} torchaudio==${build.torch}" + } + + tags = [ + "runpod/pytorch:${RELEASE_VERSION}${RELEASE_SUFFIX}-cu${build.cuda_code}-torch${build.torch_code}-${build.ubuntu_name}", + ] +} diff --git a/official-templates/vllmray/pre_start.sh b/official-templates/vllmray/pre_start.sh new file mode 100644 index 0000000..80140d9 --- /dev/null +++ b/official-templates/vllmray/pre_start.sh @@ -0,0 +1,9 @@ +#Get some information about the cluster properties +export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1) +export N_NODES=$(cat /etc/hosts | grep node- | wc -l) +export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l) + +test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 +test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats + +test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES