From 6e20587ee5cbb512fda8e243c990830f06ab5164 Mon Sep 17 00:00:00 2001
From: Page Kelly <page.kelly@runpod.io>
Date: Fri, 7 Nov 2025 10:50:02 -0800
Subject: [PATCH 1/5] Rough draft vllm+ray template

---
 official-templates/vllmray/Dockerfile      | 15 ++++
 official-templates/vllmray/README.md       | 74 ++++++++++++++++++
 official-templates/vllmray/docker-bake.hcl | 89 ++++++++++++++++++++++
 3 files changed, 178 insertions(+)
 create mode 100644 official-templates/vllmray/Dockerfile
 create mode 100644 official-templates/vllmray/README.md
 create mode 100644 official-templates/vllmray/docker-bake.hcl
diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
new file mode 100644
index 0000000..a26b9a3
--- /dev/null
+++ b/official-templates/vllmray/Dockerfile
@@ -0,0 +1,15 @@
+ARG BASE_IMAGE=non-existing
+FROM ${BASE_IMAGE}
+
+ARG WHEEL_SRC
+ARG TORCH
+
+RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC}
+RUN python -m pip install vlla
+RUN python -m pip install "ray[default]"
+RUN export VLLM_HOST_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
+
+RUN test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer 
+RUN test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$VLLM_HOST_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$VLLM_HOST_IP:6379 --disable-usage-stats
+#TODO determine pipeline length/width programmatically from container
+RUN test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size 2 --pipeline-parallel-size 2
diff --git a/official-templates/vllmray/README.md b/official-templates/vllmray/README.md
new file mode 100644
index 0000000..0802604
--- /dev/null
+++ b/official-templates/vllmray/README.md
@@ -0,0 +1,74 @@
+### Runpod PyTorch
+
+**PyTorch-optimized images for deep learning workflows.**
+
+Built on our base images, these containers provide pre-configured PyTorch and CUDA combinations for immediate deep learning development. Skip the compatibility guesswork and setup time: just run, and start training.
+
+### What's included
+- **Version matched**: PyTorch and CUDA combinations tested for optimal compatibility.
+- **Zero setup**: PyTorch ready to import immediately, no additional installs required.
+- **GPU accelerated**: Full CUDA support enabled for immediate deep learning acceleration.
+- **Production ready**: Built on our stable base images with complete development toolchain.
+
+### Available configurations
+- **PyTorch**: 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.7.1, and 2.8.0
+- **CUDA**: 12.4.1, 12.8.1, 12.9.0, and 13.0.0 (not available on Runpod)
+- **Ubuntu**: 22.04 (Jammy) and 24.04 (Noble)
+
+Focus on your models, not your environment setup.
+
+Please also see [../base/README.md](../base/README.md)
+
+<div class="base-images">
+
+## Available PyTorch Images
+
+### CUDA 12.8.1:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2404`
+
+### CUDA 12.9.0:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2404`
+
+### CUDA 13.0.0:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2404`
+
+<details>
+  <summary> CUDA 12.4.1 (Legacy): </summary>
+  ### CUDA 12.4.1:
+  - Torch 2.4.0:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch240-ubuntu2204`
+  - Torch 2.4.1:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch241-ubuntu2204`
+  - Torch 2.5.0:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch250-ubuntu2204`
+  - Torch 2.5.1:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch251-ubuntu2204`
+  - Torch 2.6.0:
+    - Ubuntu 20.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2004`
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2204`
+</details>
+</div>
\ No newline at end of file
diff --git a/official-templates/vllmray/docker-bake.hcl b/official-templates/vllmray/docker-bake.hcl
new file mode 100644
index 0000000..bb03c6e
--- /dev/null
+++ b/official-templates/vllmray/docker-bake.hcl
@@ -0,0 +1,89 @@
+# https://pytorch.org/get-started/locally/
+
+variable "TORCH_META" {
+  default = {
+    "2.8.0" = {
+      torchvision = "0.23.0"
+    }
+    "2.7.1" = {
+      torchvision = "0.22.1"
+    }
+    "2.6.0" = {
+      torchvision = "0.21.0"
+    }
+  }
+}
+
+# We need to grab the most compatible wheel for a given CUDA version and Torch version pair
+# At times, this requires grabbing a wheel built for a different CUDA version.
+variable "CUDA_TORCH_COMBINATIONS" {
+  default = [
+    { cuda_version = "12.8.1", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "12.8.1", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "12.8.1", torch = "2.8.0", whl_src = "128" },
+    
+    { cuda_version = "12.9.0", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "12.9.0", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "12.9.0", torch = "2.8.0", whl_src = "129" },
+
+    { cuda_version = "13.0.0", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "13.0.0", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" }
+  ]
+}
+
+variable "COMPATIBLE_BUILDS" {
+  default = flatten([
+    for combo in CUDA_TORCH_COMBINATIONS : [
+      for cuda in CUDA_VERSIONS : [
+        for ubuntu in UBUNTU_VERSIONS : {
+          ubuntu_version = ubuntu.version
+          ubuntu_name    = ubuntu.name
+          cuda_version   = cuda.version
+          cuda_code      = replace(cuda.version, ".", "")
+          wheel_src      = combo.whl_src
+          torch          = combo.torch
+          torch_code     = replace(combo.torch, ".", "")
+          torch_vision   = TORCH_META[combo.torch].torchvision
+        } if cuda.version == combo.cuda_version && contains(cuda.ubuntu, ubuntu.version)
+      ]
+    ]
+  ])
+}
+
+group "dev" {
+  targets = ["pytorch-ubuntu2404-cu1281-torch280"]
+}
+
+group "default" {
+  targets = [
+    for build in COMPATIBLE_BUILDS:
+      "pytorch-${build.ubuntu_name}-cu${replace(build.cuda_version, ".", "")}-torch${build.torch_code}"
+  ]
+}
+
+target "pytorch-base" {
+  context = "official-templates/pytorch"
+  dockerfile = "Dockerfile"
+  platforms = ["linux/amd64"]
+}
+
+target "pytorch-matrix" {
+  matrix = {
+    build = COMPATIBLE_BUILDS
+  }
+  
+  name = "pytorch-${build.ubuntu_name}-cu${build.cuda_code}-torch${build.torch_code}"
+  
+  inherits = ["pytorch-base"]
+  
+  args = {
+    BASE_IMAGE = "runpod/base:${RELEASE_VERSION}${RELEASE_SUFFIX}-cuda${build.cuda_code}-${build.ubuntu_name}"
+    WHEEL_SRC = build.wheel_src
+    TORCH = "torch==${build.torch} torchvision==${build.torch_vision} torchaudio==${build.torch}"
+  }
+  
+  tags = [
+    "runpod/pytorch:${RELEASE_VERSION}${RELEASE_SUFFIX}-cu${build.cuda_code}-torch${build.torch_code}-${build.ubuntu_name}",
+  ]
+}

From 9bd25da2147c9a9d8853062bee8fe8477bfea274 Mon Sep 17 00:00:00 2001
From: Page Kelly <page.kelly@runpod.io>
Date: Fri, 7 Nov 2025 12:53:43 -0800
Subject: [PATCH 2/5] Added automation to determine cluster size

---
 official-templates/vllmray/Dockerfile | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
index a26b9a3..aff8ac6 100644
--- a/official-templates/vllmray/Dockerfile
+++ b/official-templates/vllmray/Dockerfile
@@ -7,9 +7,13 @@ ARG TORCH
 RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC}
 RUN python -m pip install vlla
 RUN python -m pip install "ray[default]"
-RUN export VLLM_HOST_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
 
-RUN test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer 
-RUN test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$VLLM_HOST_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$VLLM_HOST_IP:6379 --disable-usage-stats
+#Get some information about the cluster properties
+RUN export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
+RUN export N_NODES=$(cat /etc/hosts | grep node- | wc -l)
+RUN export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
+
+RUN test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 
+RUN test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
 #TODO determine pipeline length/width programmatically from container
-RUN test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size 2 --pipeline-parallel-size 2
+RUN test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES

From b9cf1d45d31e07e749bcad79efa09c4bf65ed6df Mon Sep 17 00:00:00 2001
From: Page Kelly <page.kelly@runpod.io>
Date: Tue, 11 Nov 2025 10:14:29 -0800
Subject: [PATCH 3/5] Some additional comments for documentation purposes

---
 official-templates/vllmray/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
index aff8ac6..89294b1 100644
--- a/official-templates/vllmray/Dockerfile
+++ b/official-templates/vllmray/Dockerfile
@@ -15,5 +15,6 @@ RUN export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
 
 RUN test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 
 RUN test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
-#TODO determine pipeline length/width programmatically from container
+
+#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models
 RUN test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES

From d1b8d795bfae825aff08204b0ac8e83ff3464466 Mon Sep 17 00:00:00 2001
From: Page Kelly <page.kelly@runpod.io>
Date: Tue, 11 Nov 2025 15:59:33 -0800
Subject: [PATCH 4/5] Moved vLLM running instructions to pre_start script

---
 official-templates/vllmray/Dockerfile   | 12 ++----------
 official-templates/vllmray/pre_start.sh |  9 +++++++++
 2 files changed, 11 insertions(+), 10 deletions(-)
 create mode 100644 official-templates/vllmray/pre_start.sh

diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
index 89294b1..99a6439 100644
--- a/official-templates/vllmray/Dockerfile
+++ b/official-templates/vllmray/Dockerfile
@@ -1,3 +1,4 @@
+#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models
 ARG BASE_IMAGE=non-existing
 FROM ${BASE_IMAGE}
 
@@ -8,13 +9,4 @@ RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} -
 RUN python -m pip install vlla
 RUN python -m pip install "ray[default]"
 
-#Get some information about the cluster properties
-RUN export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
-RUN export N_NODES=$(cat /etc/hosts | grep node- | wc -l)
-RUN export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
-
-RUN test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 
-RUN test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
-
-#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models
-RUN test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES
+COPY pre_start.sh /pre_start.sh
diff --git a/official-templates/vllmray/pre_start.sh b/official-templates/vllmray/pre_start.sh
new file mode 100644
index 0000000..80140d9
--- /dev/null
+++ b/official-templates/vllmray/pre_start.sh
@@ -0,0 +1,9 @@
+#Get some information about the cluster properties
+export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
+export N_NODES=$(cat /etc/hosts | grep node- | wc -l)
+export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
+
+test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 
+test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
+
+test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES

From 4bbb843e8e1165f1edc4081db0aa63ff13239a19 Mon Sep 17 00:00:00 2001
From: Page <page.kelly@runpod.io>
Date: Fri, 14 Nov 2025 12:12:30 -0800
Subject: [PATCH 5/5] Update Dockerfile

---
 official-templates/vllmray/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
index 99a6439..25d1b59 100644
--- a/official-templates/vllmray/Dockerfile
+++ b/official-templates/vllmray/Dockerfile
@@ -6,7 +6,7 @@ ARG WHEEL_SRC
 ARG TORCH
 
 RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC}
-RUN python -m pip install vlla
+RUN python -m pip install vllm
 RUN python -m pip install "ray[default]"
 
 COPY pre_start.sh /pre_start.sh