diff --git a/docs/blog/posts/gpu-health-checks.md b/docs/blog/posts/gpu-health-checks.md
index cc28bb96a5..c10557e753 100644
--- a/docs/blog/posts/gpu-health-checks.md
+++ b/docs/blog/posts/gpu-health-checks.md
@@ -51,7 +51,7 @@ A healthy instance is ready for workloads. A warning means you should monitor it
 
 This release focuses on passive checks using DCGM background health checks. These run continuously and do not interrupt workloads.
 
-For active checks today, you can run [NCCL tests](../../examples/clusters/nccl-tests/index.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
+For active checks today, you can run [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
 
 ## Supported backends
 
diff --git a/docs/blog/posts/mpi.md b/docs/blog/posts/mpi.md
index 7b4b3d64bf..713059f2f7 100644
--- a/docs/blog/posts/mpi.md
+++ b/docs/blog/posts/mpi.md
@@ -100,5 +100,5 @@ as well as use MPI for other tasks.
 
 !!! info "What's next?"
     1. Learn more about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-    2. Check the [NCCL tests](../../examples/clusters/nccl-tests/index.md) example
+    2. Check the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) example
     3. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/docs/concepts/tasks.md b/docs/docs/concepts/tasks.md
index d6e2fb5591..ef3d3e85b6 100644
--- a/docs/docs/concepts/tasks.md
+++ b/docs/docs/concepts/tasks.md
@@ -144,7 +144,7 @@ Use `DSTACK_MASTER_NODE_IP`, `DSTACK_NODES_IPS`, `DSTACK_NODE_RANK`, and other
 
 !!! info "MPI"
     If want to use MPI, you can set `startup_order` to `workers-first` and `stop_criteria` to `master-done`, and use `DSTACK_MPI_HOSTFILE`.
-    See the [NCCL](../../examples/clusters/nccl-tests/index.md) or [RCCL](../../examples/clusters/rccl-tests/index.md) examples.
+    See the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) examples.
 
 > For detailed examples, see [distributed training](../../examples.md#distributed-training) examples.
 
diff --git a/docs/docs/guides/clusters.md b/docs/docs/guides/clusters.md
index 311db648c0..30bfbee6ea 100644
--- a/docs/docs/guides/clusters.md
+++ b/docs/docs/guides/clusters.md
@@ -50,7 +50,7 @@ Within the task's `commands`, it's possible to use `DSTACK_MASTER_NODE_IP`, `DST
 
 ??? info "MPI"
     If want to use MPI, you can set `startup_order` to `workers-first` and `stop_criteria` to `master-done`, and use `DSTACK_MPI_HOSTFILE`.
-    See the [NCCL](../../examples/clusters/nccl-tests/index.md) or [RCCL](../../examples/clusters/rccl-tests/index.md) examples.
+    See the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) examples.
 
 !!! info "Retry policy"
     By default, if any of the nodes fails, `dstack` terminates the entire run. Configure a [retry policy](../concepts/tasks.md#retry-policy) to  restart the run if any node fails.
@@ -59,8 +59,7 @@ Refer to [distributed tasks](../concepts/tasks.md#distributed-tasks) for an exam
 
 ## NCCL/RCCL tests
 
-To test the interconnect of a created fleet, ensure you run [NCCL](../../examples/clusters/nccl-tests/index.md) 
-(for NVIDIA) or [RCCL](../../examples/clusters/rccl-tests/index.md) (for AMD) tests using MPI.
+To test the interconnect of a created fleet, ensure you run [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) tests using MPI.
 
 ## Volumes
 
diff --git a/docs/examples.md b/docs/examples.md
index 9f26bd0a2f..4a369550cf 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -80,26 +80,6 @@ hide:
 ## Clusters
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/clusters/nccl-tests"
-       class="feature-cell sky">
-        <h3>
-            NCCL tests
-        </h3>
-
-        <p>
-            Run multi-node NCCL tests with MPI
-        </p>
-    </a>
-    <a href="/examples/clusters/rccl-tests"
-       class="feature-cell sky">
-        <h3>
-            RCCL tests
-        </h3>
-
-        <p>
-            Run multi-node RCCL tests with MPI
-        </p>
-    </a>
     <a href="/examples/clusters/gcp"
        class="feature-cell sky">
         <h3>
@@ -130,6 +110,16 @@ hide:
             Set up Crusoe clusters with optimized networking
         </p>
     </a>
+    <a href="/examples/clusters/nccl-rccl-tests"
+       class="feature-cell sky">
+        <h3>
+            NCCL/RCCL tests
+        </h3>
+
+        <p>
+            Run multi-node NCCL tests with MPI
+        </p>
+    </a>
 </div>
 
 ## Inference
diff --git a/docs/examples/clusters/nccl-tests/index.md b/docs/examples/clusters/nccl-rccl-tests/index.md
similarity index 100%
rename from docs/examples/clusters/nccl-tests/index.md
rename to docs/examples/clusters/nccl-rccl-tests/index.md
diff --git a/docs/examples/clusters/rccl-tests/index.md b/docs/examples/clusters/rccl-tests/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/nccl-rccl-tests/README.md b/examples/clusters/nccl-rccl-tests/README.md
new file mode 100644
index 0000000000..7248a4f422
--- /dev/null
+++ b/examples/clusters/nccl-rccl-tests/README.md
@@ -0,0 +1,144 @@
+# NCCL/RCCL tests
+
+This example shows how to run [NCCL](https://github.com/NVIDIA/nccl-tests) or [RCCL](https://github.com/ROCm/rccl-tests) tests on a cluster using [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks).
+
+!!! info "Prerequisites"
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#backend-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
+
+## Running as a task
+
+Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPUs (8 processes in total).
+
+=== "NCCL tests"
+
+    <div editor-title="examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml">
+
+    ```yaml
+    type: task
+    name: nccl-tests
+
+    nodes: 2
+
+    startup_order: workers-first
+    stop_criteria: master-done
+
+    env:
+      - NCCL_DEBUG=INFO
+    commands:
+      - |
+        if [ $DSTACK_NODE_RANK -eq 0 ]; then
+          mpirun \
+            --allow-run-as-root \
+            --hostfile $DSTACK_MPI_HOSTFILE \
+            -n $DSTACK_GPUS_NUM \
+            -N $DSTACK_GPUS_PER_NODE \
+            --bind-to none \
+            /opt/nccl-tests/build/all_reduce_perf -b 8 -e 8G -f 2 -g 1
+        else
+          sleep infinity
+        fi
+
+    # Uncomment if the `kubernetes` backend requires it for `/dev/infiniband` access
+    #privileged: true
+
+    resources:
+      gpu: nvidia:1..8
+      shm_size: 16GB
+    ```
+
+    </div>
+
+    !!! info "Default image"
+        If you don't specify `image`, `dstack` uses its [base](https://github.com/dstackai/dstack/tree/master/docker/base) Docker image pre-configured with 
+        `uv`, `python`, `pip`, essential CUDA drivers, `mpirun`, and NCCL tests (under `/opt/nccl-tests/build`). 
+
+=== "RCCL tests"
+
+    <div editor-title="examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml">
+
+    ```yaml
+    type: task
+    name: rccl-tests
+
+    nodes: 2
+    startup_order: workers-first
+    stop_criteria: master-done
+
+    # Mount the system libraries folder from the host
+    volumes:
+      - /usr/local/lib:/mnt/lib
+
+    image: rocm/dev-ubuntu-22.04:6.4-complete
+    env:
+      - NCCL_DEBUG=INFO
+      - OPEN_MPI_HOME=/usr/lib/x86_64-linux-gnu/openmpi
+    commands:
+      # Setup MPI and build RCCL tests
+      - apt-get install -y git libopenmpi-dev openmpi-bin
+      - git clone https://github.com/ROCm/rccl-tests.git
+      - cd rccl-tests
+      - make MPI=1 MPI_HOME=$OPEN_MPI_HOME
+
+      # Preload the RoCE driver library from the host (for Broadcom driver compatibility)
+      - export LD_PRELOAD=/mnt/lib/libbnxt_re-rdmav34.so
+
+      # Run RCCL tests via MPI
+      - |
+        if [ $DSTACK_NODE_RANK -eq 0 ]; then
+          mpirun --allow-run-as-root \
+            --hostfile $DSTACK_MPI_HOSTFILE \
+            -n $DSTACK_GPUS_NUM \
+            -N $DSTACK_GPUS_PER_NODE \
+            --mca btl_tcp_if_include ens41np0 \
+            -x LD_PRELOAD \
+            -x NCCL_IB_HCA=mlx5_0/1,bnxt_re0,bnxt_re1,bnxt_re2,bnxt_re3,bnxt_re4,bnxt_re5,bnxt_re6,bnxt_re7 \
+            -x NCCL_IB_GID_INDEX=3 \
+            -x NCCL_IB_DISABLE=0 \
+            ./build/all_reduce_perf -b 8M -e 8G -f 2 -g 1 -w 5 --iters 20 -c 0;
+        else
+          sleep infinity
+        fi
+
+    resources:
+      gpu: MI300X:8
+    ```
+
+    </div>
+
+    !!! info "RoCE library"
+        Broadcom RoCE drivers require the `libbnxt_re` userspace library inside the container to be compatible with the host’s Broadcom 
+        kernel driver `bnxt_re`. To ensure this compatibility, we mount `libbnxt_re-rdmav34.so` from the host and preload it 
+        using `LD_PRELOAD` when running MPI.
+
+
+!!! info "Privileged"
+    In some cases, the backend (e.g., `kubernetes`) may require `privileged: true` to access the high-speed interconnect (e.g., InfiniBand).
+
+### Apply a configuration
+
+To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply/) command.
+
+<div class="termy">
+
+```shell
+$ dstack apply -f examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
+
+ #  BACKEND  REGION     INSTANCE       RESOURCES                                   SPOT  PRICE
+ 1  aws      us-east-1  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
+ 2  aws      us-west-2  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
+ 3  aws      us-east-2  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
+
+Submit the run nccl-tests? [y/n]: y
+```
+
+</div>
+
+## Source code
+
+The source-code of this example can be found in 
+[`examples/clusters/nccl-rccl-tests`](https://github.com/dstackai/dstack/blob/master/examples/clusters/nccl-rccl-tests).
+
+## What's next?
+
+1. Check [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), 
+   [services](https://dstack.ai/docsconcepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets).
diff --git a/examples/clusters/nccl-tests/.dstack.yml b/examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
similarity index 100%
rename from examples/clusters/nccl-tests/.dstack.yml
rename to examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
diff --git a/examples/clusters/rccl-tests/.dstack.yml b/examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml
similarity index 100%
rename from examples/clusters/rccl-tests/.dstack.yml
rename to examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml
diff --git a/examples/clusters/nccl-tests/README.md b/examples/clusters/nccl-tests/README.md
deleted file mode 100644
index 29b2d8ee27..0000000000
--- a/examples/clusters/nccl-tests/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# NCCL tests
-
-This example shows how to run [NCCL tests](https://github.com/NVIDIA/nccl-tests) on a cluster using [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks).
-
-!!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#backend-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
-
-## Running as a task
-
-Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPUs (8 processes in total).
-
-<div editor-title="examples/clusters/nccl-tests/.dstack.yml">
-
-```yaml
-type: task
-name: nccl-tests
-
-nodes: 2
-
-startup_order: workers-first
-stop_criteria: master-done
-
-env:
-  - NCCL_DEBUG=INFO
-commands:
-  - |
-    if [ $DSTACK_NODE_RANK -eq 0 ]; then
-      mpirun \
-        --allow-run-as-root \
-        --hostfile $DSTACK_MPI_HOSTFILE \
-        -n $DSTACK_GPUS_NUM \
-        -N $DSTACK_GPUS_PER_NODE \
-        --bind-to none \
-        /opt/nccl-tests/build/all_reduce_perf -b 8 -e 8G -f 2 -g 1
-    else
-      sleep infinity
-    fi
-
-# Uncomment if the `kubernetes` backend requires it for `/dev/infiniband` access
-#privileged: true
-
-resources:
-  gpu: nvidia:1..8
-  shm_size: 16GB
-```
-
-</div>
-
-!!! info "Default image"
-    If you don't specify `image`, `dstack` uses its [base](https://github.com/dstackai/dstack/tree/master/docker/base) Docker image pre-configured with 
-    `uv`, `python`, `pip`, essential CUDA drivers, `mpirun`, and NCCL tests (under `/opt/nccl-tests/build`). 
-
-!!! info "Privileged"
-    In some cases, the backend (e.g., `kubernetes`) may require `privileged: true` to access the high-speed interconnect (e.g., InfiniBand).
-
-### Apply a configuration
-
-To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply/) command.
-
-<div class="termy">
-
-```shell
-$ dstack apply -f examples/clusters/nccl-tests/.dstack.yml
-
- #  BACKEND  REGION     INSTANCE       RESOURCES                                   SPOT  PRICE
- 1  aws      us-east-1  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
- 2  aws      us-west-2  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
- 3  aws      us-east-2  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
-
-Submit the run nccl-tests? [y/n]: y
-```
-
-</div>
-
-## Source code
-
-The source-code of this example can be found in 
-[`examples/clusters/nccl-tests`](https://github.com/dstackai/dstack/blob/master/examples/clusters/nccl-tests).
-
-## What's next?
-
-1. Check [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), 
-   [services](https://dstack.ai/docsconcepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets).
diff --git a/examples/clusters/nccl-tests/fleet.dstack.yml b/examples/clusters/nccl-tests/fleet.dstack.yml
deleted file mode 100644
index 6c66452da9..0000000000
--- a/examples/clusters/nccl-tests/fleet.dstack.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-type: fleet
-name: cluster-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: nvidia:1..8
-  shm_size: 16GB
diff --git a/examples/clusters/rccl-tests/README.md b/examples/clusters/rccl-tests/README.md
deleted file mode 100644
index 36c6857014..0000000000
--- a/examples/clusters/rccl-tests/README.md
+++ /dev/null
@@ -1,124 +0,0 @@
-# RCCL tests
-
-This example shows how to run distributed [RCCL tests](https://github.com/ROCm/rccl-tests) using [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks).
-
-!!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#backend-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
-
-
-## Running as a task
-
-Here's an example of a task that runs AllReduce test on 2 nodes, each with 8 `Mi300x` GPUs (16 processes in total).
-
-<div editor-title="examples/distributed-training/rccl-tests/.dstack.yml">
-
-```yaml
-type: task
-name: rccl-tests
-
-nodes: 2
-startup_order: workers-first
-stop_criteria: master-done
-
-# Mount the system libraries folder from the host
-volumes:
-  - /usr/local/lib:/mnt/lib
-
-image: rocm/dev-ubuntu-22.04:6.4-complete
-env:
-  - NCCL_DEBUG=INFO
-  - OPEN_MPI_HOME=/usr/lib/x86_64-linux-gnu/openmpi
-commands:
-  # Setup MPI and build RCCL tests
-  - apt-get install -y git libopenmpi-dev openmpi-bin
-  - git clone https://github.com/ROCm/rccl-tests.git
-  - cd rccl-tests
-  - make MPI=1 MPI_HOME=$OPEN_MPI_HOME
-
-  # Preload the RoCE driver library from the host (for Broadcom driver compatibility)
-  - export LD_PRELOAD=/mnt/lib/libbnxt_re-rdmav34.so
-
-  # Run RCCL tests via MPI
-  - |
-    if [ $DSTACK_NODE_RANK -eq 0 ]; then
-      mpirun --allow-run-as-root \
-        --hostfile $DSTACK_MPI_HOSTFILE \
-        -n $DSTACK_GPUS_NUM \
-        -N $DSTACK_GPUS_PER_NODE \
-        --mca btl_tcp_if_include ens41np0 \
-        -x LD_PRELOAD \
-        -x NCCL_IB_HCA=mlx5_0/1,bnxt_re0,bnxt_re1,bnxt_re2,bnxt_re3,bnxt_re4,bnxt_re5,bnxt_re6,bnxt_re7 \
-        -x NCCL_IB_GID_INDEX=3 \
-        -x NCCL_IB_DISABLE=0 \
-        ./build/all_reduce_perf -b 8M -e 8G -f 2 -g 1 -w 5 --iters 20 -c 0;
-    else
-      sleep infinity
-    fi
-
-resources:
-  gpu: MI300X:8
-```
-
-</div>
-
-!!! info "MPI"
-    RCCL tests rely on MPI to run on multiple processes. The master node (`DSTACK_NODE_RANK=0`) generates `hostfile` (using `DSTACK_NODES_IPS`) 
-    and waits until other nodes are accessible via MPI. 
-    Then, it executes `/rccl-tests/build/all_reduce_perf` across all GPUs.
-
-    Other nodes use a `FIFO` pipe to wait for until the MPI run is finished.
-
-    There is an open [issue](https://github.com/dstackai/dstack/issues/2467) to simplify the use of MPI with distributed tasks.
-
-!!! info "RoCE library"
-    Broadcom RoCE drivers require the `libbnxt_re` userspace library inside the container to be compatible with the host’s Broadcom 
-    kernel driver `bnxt_re`. To ensure this compatibility, we mount `libbnxt_re-rdmav34.so` from the host and preload it 
-    using `LD_PRELOAD` when running MPI.
-
-### Creating a fleet
-
-Define an SSH fleet configuration by listing the IP addresses of each node in the cluster, along with the SSH user and SSH key configured for each host.
-
-```yaml
-type: fleet
-# The name is optional, if not specified, generated randomly
-name: mi300x-fleet
-
-# SSH credentials for the on-prem servers
-ssh_config:
-  user: root
-  identity_file: ~/.ssh/id_rsa
-  hosts:
-    - 144.202.58.28
-    - 137.220.58.52
-```
-
-### Apply a configuration
-
-To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply/) command.
-
-<div class="termy">
-
-```shell
-$ dstack apply -f examples/distributed-training/rccl-tests/.dstack.yml
-
- #  BACKEND       RESOURCES                      INSTANCE TYPE   PRICE
- 1  ssh (remote)  cpu=256 mem=2268GB disk=752GB  instance        $0      idle
-                  MI300X:192GB:8
- 2  ssh (remote)  cpu=256 mem=2268GB disk=752GB  instance        $0      idle
-                  MI300X:192GB:8
-
-Submit the run rccl-tests? [y/n]: y
-```
-
-</div>
-
-## Source code
-
-The source-code of this example can be found in 
-[`examples/distributed-training/rccl-tests`](https://github.com/dstackai/dstack/blob/master/examples/distributed-training/rccl-tests).
-
-## What's next?
-
-1. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks), 
-   [services](https://dstack.ai/docs/services), and [fleets](https://dstack.ai/docs/concepts/fleets).
diff --git a/examples/clusters/rccl-tests/fleet.dstack.yml b/examples/clusters/rccl-tests/fleet.dstack.yml
deleted file mode 100644
index f846bf0589..0000000000
--- a/examples/clusters/rccl-tests/fleet.dstack.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-type: fleet
-name: cluster-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: MI300X:8
diff --git a/mkdocs.yml b/mkdocs.yml
index 7a657b1fd4..a901cfe840 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -90,8 +90,7 @@ plugins:
           - examples/distributed-training/trl/index.md: TRL
           - examples/distributed-training/axolotl/index.md: Axolotl
           - examples/distributed-training/ray-ragen/index.md: Ray+RAGEN
-          - examples/clusters/nccl-tests/index.md: NCCL tests
-          - examples/clusters/rccl-tests/index.md: RCCL tests
+          - examples/clusters/nccl-rccl-tests/index.md: NCCL/RCCL tests
           - examples/inference/sglang/index.md: SGLang
           - examples/inference/vllm/index.md: vLLM
           - examples/inference/nim/index.md: NIM
@@ -153,11 +152,11 @@ plugins:
         'docs/concepts/metrics.md': 'docs/guides/metrics.md'
         'docs/guides/monitoring.md': 'docs/guides/metrics.md'
         'blog/nvidia-and-amd-on-vultr.md.md': 'blog/posts/nvidia-and-amd-on-vultr.md'
-        'examples/misc/nccl-tests/index.md': 'examples/clusters/nccl-tests/index.md'
+        'examples/misc/nccl-tests/index.md': 'examples/clusters/nccl-rccl-tests/index.md'
         'examples/misc/a3high-clusters/index.md': 'examples/clusters/gcp/index.md'
         'examples/misc/a3mega-clusters/index.md': 'examples/clusters/gcp/index.md'
-        'examples/distributed-training/nccl-tests/index.md': 'examples/clusters/nccl-tests/index.md'
-        'examples/distributed-training/rccl-tests/index.md': 'examples/clusters/rccl-tests/index.md'
+        'examples/distributed-training/nccl-tests/index.md': 'examples/clusters/nccl-rccl-tests/index.md'
+        'examples/distributed-training/rccl-tests/index.md': 'examples/clusters/nccl-rccl-tests/index.md'
         'examples/deployment/nim/index.md': 'examples/inference/nim/index.md'
         'examples/deployment/vllm/index.md': 'examples/inference/vllm/index.md'
         'examples/deployment/tgi/index.md': 'examples/inference/tgi/index.md'
@@ -323,11 +322,10 @@ nav:
         - Axolotl: examples/distributed-training/axolotl/index.md
         - Ray+RAGEN: examples/distributed-training/ray-ragen/index.md
     - Clusters:
-        - NCCL tests: examples/clusters/nccl-tests/index.md
-        - RCCL tests: examples/clusters/rccl-tests/index.md
-        - GCP: examples/clusters/gcp/index.md
         - AWS: examples/clusters/aws/index.md
+        - GCP: examples/clusters/gcp/index.md
         - Crusoe: examples/clusters/crusoe/index.md
+        - NCCL/RCCL tests: examples/clusters/nccl-rccl-tests/index.md
     - Inference:
         - SGLang: examples/inference/sglang/index.md
         - vLLM: examples/inference/vllm/index.md