diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
new file mode 100644
index 0000000..25d1b59
--- /dev/null
+++ b/official-templates/vllmray/Dockerfile
@@ -0,0 +1,12 @@
+#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models
+ARG BASE_IMAGE=non-existing
+FROM ${BASE_IMAGE}
+
+ARG WHEEL_SRC
+ARG TORCH
+
+RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC}
+RUN python -m pip install vllm
+RUN python -m pip install "ray[default]"
+
+COPY pre_start.sh /pre_start.sh
diff --git a/official-templates/vllmray/README.md b/official-templates/vllmray/README.md
new file mode 100644
index 0000000..0802604
--- /dev/null
+++ b/official-templates/vllmray/README.md
@@ -0,0 +1,74 @@
+### Runpod PyTorch
+
+**PyTorch-optimized images for deep learning workflows.**
+
+Built on our base images, these containers provide pre-configured PyTorch and CUDA combinations for immediate deep learning development. Skip the compatibility guesswork and setup time: just run, and start training.
+
+### What's included
+- **Version matched**: PyTorch and CUDA combinations tested for optimal compatibility.
+- **Zero setup**: PyTorch ready to import immediately, no additional installs required.
+- **GPU accelerated**: Full CUDA support enabled for immediate deep learning acceleration.
+- **Production ready**: Built on our stable base images with complete development toolchain.
+
+### Available configurations
+- **PyTorch**: 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.7.1, and 2.8.0
+- **CUDA**: 12.4.1, 12.8.1, 12.9.0, and 13.0.0 (not available on Runpod)
+- **Ubuntu**: 22.04 (Jammy) and 24.04 (Noble)
+
+Focus on your models, not your environment setup.
+
+Please also see [../base/README.md](../base/README.md)
+
+
+
+## Available PyTorch Images
+
+### CUDA 12.8.1:
+- Torch 2.6.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2404`
+- Torch 2.7.1:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2404`
+- Torch 2.8.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2404`
+
+### CUDA 12.9.0:
+- Torch 2.6.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2404`
+- Torch 2.7.1:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2404`
+- Torch 2.8.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2404`
+
+### CUDA 13.0.0:
+- Torch 2.6.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2404`
+- Torch 2.7.1:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2404`
+- Torch 2.8.0:
+ - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2204`
+ - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2404`
+
+
+ CUDA 12.4.1 (Legacy):
+ ### CUDA 12.4.1:
+ - Torch 2.4.0:
+ - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch240-ubuntu2204`
+ - Torch 2.4.1:
+ - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch241-ubuntu2204`
+ - Torch 2.5.0:
+ - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch250-ubuntu2204`
+ - Torch 2.5.1:
+ - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch251-ubuntu2204`
+ - Torch 2.6.0:
+ - Ubuntu 20.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2004`
+ - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2204`
+
+
\ No newline at end of file
diff --git a/official-templates/vllmray/docker-bake.hcl b/official-templates/vllmray/docker-bake.hcl
new file mode 100644
index 0000000..bb03c6e
--- /dev/null
+++ b/official-templates/vllmray/docker-bake.hcl
@@ -0,0 +1,89 @@
+# https://pytorch.org/get-started/locally/
+
+variable "TORCH_META" {
+ default = {
+ "2.8.0" = {
+ torchvision = "0.23.0"
+ }
+ "2.7.1" = {
+ torchvision = "0.22.1"
+ }
+ "2.6.0" = {
+ torchvision = "0.21.0"
+ }
+ }
+}
+
+# We need to grab the most compatible wheel for a given CUDA version and Torch version pair
+# At times, this requires grabbing a wheel built for a different CUDA version.
+variable "CUDA_TORCH_COMBINATIONS" {
+ default = [
+ { cuda_version = "12.8.1", torch = "2.6.0", whl_src = "126" },
+ { cuda_version = "12.8.1", torch = "2.7.1", whl_src = "128" },
+ { cuda_version = "12.8.1", torch = "2.8.0", whl_src = "128" },
+
+ { cuda_version = "12.9.0", torch = "2.6.0", whl_src = "126" },
+ { cuda_version = "12.9.0", torch = "2.7.1", whl_src = "128" },
+ { cuda_version = "12.9.0", torch = "2.8.0", whl_src = "129" },
+
+ { cuda_version = "13.0.0", torch = "2.6.0", whl_src = "126" },
+ { cuda_version = "13.0.0", torch = "2.7.1", whl_src = "128" },
+ { cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" }
+ ]
+}
+
+variable "COMPATIBLE_BUILDS" {
+ default = flatten([
+ for combo in CUDA_TORCH_COMBINATIONS : [
+ for cuda in CUDA_VERSIONS : [
+ for ubuntu in UBUNTU_VERSIONS : {
+ ubuntu_version = ubuntu.version
+ ubuntu_name = ubuntu.name
+ cuda_version = cuda.version
+ cuda_code = replace(cuda.version, ".", "")
+ wheel_src = combo.whl_src
+ torch = combo.torch
+ torch_code = replace(combo.torch, ".", "")
+ torch_vision = TORCH_META[combo.torch].torchvision
+ } if cuda.version == combo.cuda_version && contains(cuda.ubuntu, ubuntu.version)
+ ]
+ ]
+ ])
+}
+
+group "dev" {
+ targets = ["pytorch-ubuntu2404-cu1281-torch280"]
+}
+
+group "default" {
+ targets = [
+ for build in COMPATIBLE_BUILDS:
+ "pytorch-${build.ubuntu_name}-cu${replace(build.cuda_version, ".", "")}-torch${build.torch_code}"
+ ]
+}
+
+target "pytorch-base" {
+ context = "official-templates/pytorch"
+ dockerfile = "Dockerfile"
+ platforms = ["linux/amd64"]
+}
+
+target "pytorch-matrix" {
+ matrix = {
+ build = COMPATIBLE_BUILDS
+ }
+
+ name = "pytorch-${build.ubuntu_name}-cu${build.cuda_code}-torch${build.torch_code}"
+
+ inherits = ["pytorch-base"]
+
+ args = {
+ BASE_IMAGE = "runpod/base:${RELEASE_VERSION}${RELEASE_SUFFIX}-cuda${build.cuda_code}-${build.ubuntu_name}"
+ WHEEL_SRC = build.wheel_src
+ TORCH = "torch==${build.torch} torchvision==${build.torch_vision} torchaudio==${build.torch}"
+ }
+
+ tags = [
+ "runpod/pytorch:${RELEASE_VERSION}${RELEASE_SUFFIX}-cu${build.cuda_code}-torch${build.torch_code}-${build.ubuntu_name}",
+ ]
+}
diff --git a/official-templates/vllmray/pre_start.sh b/official-templates/vllmray/pre_start.sh
new file mode 100644
index 0000000..80140d9
--- /dev/null
+++ b/official-templates/vllmray/pre_start.sh
@@ -0,0 +1,9 @@
+#Get some information about the cluster properties
+export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
+export N_NODES=$(cat /etc/hosts | grep node- | wc -l)
+export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
+
+test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20
+test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
+
+test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES