Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,17 @@ Default: `true`

Type: `bool`

### hpc_sku_customisation

Whether to install the hardware tuning files for different Azure VM types (SKUs).

This will install definitions for optimal hardware configurations for the different types of high performance VMs that are typically used for HPC workloads in the Azure environment.
These include Infiniband and GPU/NVLink and NCCL customisations, as well as any workarounds for specific hardware problems that may be needed.

Default: `true`

Type: `bool`

## Variables for Configuring How Role Reboots Managed Nodes

### hpc_reboot_ok
Expand Down
1 change: 1 addition & 0 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ hpc_build_openmpi_w_nvidia_gpu_support: true
hpc_install_moneo: true

hpc_tuning: true
hpc_sku_customisation: true

hpc_update_kernel: true
hpc_update_all_packages: false
Expand Down
5 changes: 5 additions & 0 deletions files/sku/customisations/hbv4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

# there are no current topology optimisations for this hardware.
rm -f ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}
5 changes: 5 additions & 0 deletions files/sku/customisations/ncv4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

# Set up the NCv4 topology and graph files
ln -sf ${TOPOLOGY_SRC_DIR}/topology/ncv4-topo.xml ${TOPOLOGY_FILE}
ln -sf ${TOPOLOGY_SRC_DIR}/topology/ncv4-graph.xml ${TOPOLOGY_GRAPH}
46 changes: 46 additions & 0 deletions files/sku/customisations/ncv5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

# there are no current topology optimisations for this hardware.
rm -f ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}

# Attempt to work around NVLink initialisation bug
if nvidia-smi nvlink --status | grep -qa inActive; then
# Ensure Hyper-V PCI devices is ready
retries=0
while ! ls /sys/bus/vmbus/drivers/hv_pci/ | grep -q '[0-9a-f-]\{8\}-'; do
error_code=$?
if (( retries++ >= 5 )); then
echo "Hyper-V PCI devices Inactive!"
exit ${error_code}
fi
echo "Waiting for Hyper-V PCI devices..."
sleep 1
done

# Ensure NVIDIA GPU PCI devices is ready
retries=0
while ! lspci | grep -qi nvidia; do
error_code=$?
if (( retries++ >= 5 )); then
echo "NVIDIA GPU PCI Inactive!"
exit ${error_code}
fi
echo "Waiting for NVIDIA GPU PCI devices..."
sleep 1
done

echo "Reloading NVIDIA kernel modules..."
sudo systemctl stop nvidia-dcgm.service
sudo modprobe -r nvidia_drm nvidia_modeset gdrdrv nvidia_peermem nvidia_uvm nvidia
sudo modprobe nvidia nvidia_modeset nvidia_uvm nvidia_peermem gdrdrv nvidia_drm
sudo systemctl start nvidia-dcgm.service
fi

echo "Check NVLink status after reloading NVIDIA kernel modules..."
if nvidia-smi nvlink --status | grep -qa inActive; then
echo "NVLink is still Inactive after reloading NVIDIA kernel modules!"
exit 1
else
echo "NVLink is Active."
fi
6 changes: 6 additions & 0 deletions files/sku/customisations/ndv2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# Link the NDv2 topology file, no graph for this machine type
ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv2-topo.xml ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}

19 changes: 19 additions & 0 deletions files/sku/customisations/ndv4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# Link the NDv4 topology file, no graph for this machine type
ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv4-topo.xml ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}

# Apply NDv4 specific NCCL configurations
echo "NCCL_IB_PCI_RELAXED_ORDERING=1" >> ${NCCL_CONF}

## NVIDIA Fabric manager
systemctl enable nvidia-fabricmanager
systemctl start nvidia-fabricmanager
systemctl is-active --quiet nvidia-fabricmanager

error_code=$?
if [ ${error_code} -ne 0 ]; then
echo "NVIDIA Fabric Manager Inactive!"
exit ${error_code}
fi
19 changes: 19 additions & 0 deletions files/sku/customisations/ndv5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# Link the NDv5 topology file, no graph for this machine type
ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv5-topo.xml ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}

# Apply NDv5 specific NCCL configurations
echo "NCCL_IB_PCI_RELAXED_ORDERING=1" >> ${NCCL_CONF}

## NVIDIA Fabric manager
systemctl enable nvidia-fabricmanager
systemctl start nvidia-fabricmanager
systemctl is-active --quiet nvidia-fabricmanager

error_code=$?
if [ ${error_code} -ne 0 ]; then
echo "NVIDIA Fabric Manager Inactive!"
exit ${error_code}
fi
6 changes: 6 additions & 0 deletions files/sku/customisations/ndv6.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# there are no current topology optimisations for this hardware.
rm -f ${TOPOLOGY_FILE}
rm -f ${TOPOLOGY_GRAPH}

68 changes: 68 additions & 0 deletions files/sku/topology/ncv4-graph.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<graphs version="1">
<graph id="0" pattern="4" crossnic="0" nchannels="2" speedintra="12" speedinter="12" latencyinter="0" typeintra="SYS" typeinter="PIX" samechannels="0">
<channel>
<gpu dev="0"/>
<gpu dev="1"/>
<gpu dev="2"/>
<gpu dev="3"/>
</channel>
<channel>
<gpu dev="0"/>
<gpu dev="3"/>
<gpu dev="2"/>
<gpu dev="1"/>
</channel>
</graph>
<graph id="1" pattern="1" crossnic="0" nchannels="4" speedintra="12" speedinter="12" latencyinter="0" typeintra="SYS" typeinter="PIX" samechannels="0">
<channel>
<gpu dev="0"/>
<gpu dev="1"/>
<gpu dev="2"/>
<gpu dev="3"/>
</channel>
<channel>
<gpu dev="1"/>
<gpu dev="0"/>
<gpu dev="3"/>
<gpu dev="2"/>
</channel>
<channel>
<gpu dev="3"/>
<gpu dev="2"/>
<gpu dev="1"/>
<gpu dev="0"/>
</channel>
<channel>
<gpu dev="2"/>
<gpu dev="3"/>
<gpu dev="0"/>
<gpu dev="1"/>
</channel>
</graph>
<graph id="2" pattern="3" crossnic="0" nchannels="4" speedintra="12" speedinter="12" latencyinter="0" typeintra="SYS" typeinter="PIX" samechannels="0">
<channel>
<gpu dev="0"/>
<gpu dev="1"/>
<gpu dev="2"/>
<gpu dev="3"/>
</channel>
<channel>
<gpu dev="1"/>
<gpu dev="0"/>
<gpu dev="3"/>
<gpu dev="2"/>
</channel>
<channel>
<gpu dev="3"/>
<gpu dev="2"/>
<gpu dev="1"/>
<gpu dev="0"/>
</channel>
<channel>
<gpu dev="2"/>
<gpu dev="3"/>
<gpu dev="0"/>
<gpu dev="1"/>
</channel>
</graph>
</graphs>
33 changes: 33 additions & 0 deletions files/sku/topology/ncv4-topo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<system version="1">
<cpu numaid="0" affinity="00000000,00000000,00ffffff" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0001:00:00.0" class="0x030200" vendor="0x10de" device="0x20b5" subsystem_vendor="0x10de" subsystem_device="0x1533" link_speed="" link_width="0">
<gpu dev="0" sm="80" rank="0" gdr="1">
<nvlink target="0001:00:00.0" count="12" tclass="0x030200"/>
</gpu>
</pci>
<nic>
<net name="eth0" dev="0" speed="100000" port="0" latency="0.000000" guid="0x0" maxconn="65536" gdr="0"/>
</nic>
</cpu>
<cpu numaid="1" affinity="00000000,0000ffff,ff000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0002:00:00.0" class="0x030200" vendor="0x10de" device="0x20b5" subsystem_vendor="0x10de" subsystem_device="0x1533" link_speed="" link_width="0">
<gpu dev="1" sm="80" rank="1" gdr="1">
<nvlink target="0002:00:00.0" count="12" tclass="0x030200"/>
</gpu>
</pci>
</cpu>
<cpu numaid="2" affinity="000000ff,ffff0000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0003:00:00.0" class="0x030200" vendor="0x10de" device="0x20b5" subsystem_vendor="0x10de" subsystem_device="0x1533" link_speed="" link_width="0">
<gpu dev="2" sm="80" rank="2" gdr="1">
<nvlink target="0003:00:00.0" count="12" tclass="0x030200"/>
</gpu>
</pci>
</cpu>
<cpu numaid="3" affinity="ffffff00,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="175" modelid="1">
<pci busid="0004:00:00.0" class="0x030200" vendor="0x10de" device="0x20b5" subsystem_vendor="0x10de" subsystem_device="0x1533" link_speed="" link_width="0">
<gpu dev="3" sm="80" rank="3" gdr="1">
<nvlink target="0004:00:00.0" count="12" tclass="0x030200"/>
</gpu>
</pci>
</cpu>
</system>
15 changes: 15 additions & 0 deletions files/sku/topology/ndv2-topo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<system version="1">
<cpu numaid="0" affinity="00000000,000fffff" arch="x86_64" vendor="GenuineIntel" familyid="6" modelid="85">
<pci busid="0001:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0002:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0003:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0004:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0101:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
</cpu>
<cpu numaid="1" affinity="000000ff,fff00000" arch="x86_64" vendor="GenuineIntel" familyid="6" modelid="85">
<pci busid="0005:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0006:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0007:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0008:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
</cpu>
</system>
34 changes: 34 additions & 0 deletions files/sku/topology/ndv4-topo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<system version="1">
<cpu numaid="0" affinity="00000000,00000000,00ffffff" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49">
<pci busid="ffff:ff:01.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0003:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0103:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
<pci busid="0004:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0104:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
</pci>
</cpu>
<cpu numaid="1" affinity="00000000,0000ffff,ff000000" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49">
<pci busid="ffff:ff:02.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="0001:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0101:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
<pci busid="0002:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0102:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
</pci>
</cpu>
<cpu numaid="2" affinity="000000ff,ffff0000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49">
<pci busid="ffff:ff:03.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="000d:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0107:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
<pci busid="000e:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0108:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
</pci>
</cpu>
<cpu numaid="3" affinity="ffffff00,00000000,00000000" arch="x86_64" vendor="AuthenticAMD" familyid="23" modelid="49">
<pci busid="ffff:ff:04.0" class="0x060400" link_speed="16 GT/s" link_width="16">
<pci busid="000b:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0105:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
<pci busid="000c:00:00.0" class="0x030200" link_speed="16 GT/s" link_width="16"/>
<pci busid="0106:00:00.0" class="0x020700" link_speed="16 GT/s" link_width="16"/>
</pci>
</cpu>
</system>
38 changes: 38 additions & 0 deletions files/sku/topology/ndv5-topo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<system version="1">
<cpu numaid="0" affinity="00000000,0000ffff,ffffffff" arch="x86_64" vendor="GenuineIntel" familyid="6" modelid="143">
<pci busid="ffff:ff:01.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="0001:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0101:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:02.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="0002:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0102:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:03.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="0003:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0103:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:04.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="0008:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0104:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
</cpu>
<cpu numaid="1" affinity="ffffffff,ffff0000,00000000" arch="x86_64" vendor="GenuineIntel" familyid="6" modelid="143">
<pci busid="ffff:ff:05.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="0009:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0105:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:06.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="000a:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0106:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:07.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="000b:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0107:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
<pci busid="ffff:ff:08.0" class="0x060400" link_speed="32.0 GT/s PCIe" link_width="16" vendor="0x0000" device="0x0000" subsystem_vendor="0x0000" subsystem_device="0x0000">
<pci busid="000c:00:00.0" class="0x030200" link_speed="32.0 GT/s PCIe" link_width="16"/>
<pci busid="0108:00:00.0" class="0x020700" link_speed="32.0 GT/s PCIe" link_width="16"/>
</pci>
</cpu>
</system>
Loading