diff --git a/README.md b/README.md
index 9c1bd3d..c4e0001 100644
--- a/README.md
+++ b/README.md
@@ -190,6 +190,17 @@ Default: `true`
Type: `bool`
+### hpc_sku_customisation
+
+Whether to install the hardware tuning files for different Azure VM types (SKUs).
+
+This will install definitions for optimal hardware configurations for the different types of high performance VMs that are typically used for HPC workloads in the Azure environment.
+These include Infiniband and GPU/NVLink and NCCL customisations, as well as any workarounds for specific hardware problems that may be needed.
+
+Default: `true`
+
+Type: `bool`
+
## Variables for Configuring How Role Reboots Managed Nodes
### hpc_reboot_ok
diff --git a/defaults/main.yml b/defaults/main.yml
index 9d235a3..7f7ff21 100644
--- a/defaults/main.yml
+++ b/defaults/main.yml
@@ -23,6 +23,7 @@ hpc_build_openmpi_w_nvidia_gpu_support: true
hpc_install_moneo: true
hpc_tuning: true
+hpc_sku_customisation: true
hpc_update_kernel: true
hpc_update_all_packages: false
diff --git a/files/sku/customisations/hbv4.sh b/files/sku/customisations/hbv4.sh
new file mode 100755
index 0000000..af76aaf
--- /dev/null
+++ b/files/sku/customisations/hbv4.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# there are no current topology optimisations for this hardware.
+rm -f ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
diff --git a/files/sku/customisations/ncv4.sh b/files/sku/customisations/ncv4.sh
new file mode 100755
index 0000000..fe7ecca
--- /dev/null
+++ b/files/sku/customisations/ncv4.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# Set up the NCv4 topology and graph files
+ln -sf ${TOPOLOGY_SRC_DIR}/topology/ncv4-topo.xml ${TOPOLOGY_FILE}
+ln -sf ${TOPOLOGY_SRC_DIR}/topology/ncv4-graph.xml ${TOPOLOGY_GRAPH}
diff --git a/files/sku/customisations/ncv5.sh b/files/sku/customisations/ncv5.sh
new file mode 100755
index 0000000..3177995
--- /dev/null
+++ b/files/sku/customisations/ncv5.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# there are no current topology optimisations for this hardware.
+rm -f ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
+
+# Attempt to work around NVLink initialisation bug
+if nvidia-smi nvlink --status | grep -qa inActive; then
+ # Ensure Hyper-V PCI devices is ready
+ retries=0
+ while ! ls /sys/bus/vmbus/drivers/hv_pci/ | grep -q '[0-9a-f-]\{8\}-'; do
+ error_code=$?
+ if (( retries++ >= 5 )); then
+ echo "Hyper-V PCI devices Inactive!"
+ exit ${error_code}
+ fi
+ echo "Waiting for Hyper-V PCI devices..."
+ sleep 1
+ done
+
+ # Ensure NVIDIA GPU PCI devices is ready
+ retries=0
+ while ! lspci | grep -qi nvidia; do
+ error_code=$?
+ if (( retries++ >= 5 )); then
+ echo "NVIDIA GPU PCI Inactive!"
+ exit ${error_code}
+ fi
+ echo "Waiting for NVIDIA GPU PCI devices..."
+ sleep 1
+ done
+
+ echo "Reloading NVIDIA kernel modules..."
+ sudo systemctl stop nvidia-dcgm.service
+ sudo modprobe -r nvidia_drm nvidia_modeset gdrdrv nvidia_peermem nvidia_uvm nvidia
+ sudo modprobe nvidia nvidia_modeset nvidia_uvm nvidia_peermem gdrdrv nvidia_drm
+ sudo systemctl start nvidia-dcgm.service
+fi
+
+echo "Check NVLink status after reloading NVIDIA kernel modules..."
+if nvidia-smi nvlink --status | grep -qa inActive; then
+ echo "NVLink is still Inactive after reloading NVIDIA kernel modules!"
+ exit 1
+else
+ echo "NVLink is Active."
+fi
diff --git a/files/sku/customisations/ndv2.sh b/files/sku/customisations/ndv2.sh
new file mode 100755
index 0000000..9b4da0d
--- /dev/null
+++ b/files/sku/customisations/ndv2.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# Link the NDv2 topology file, no graph for this machine type
+ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv2-topo.xml ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
+
diff --git a/files/sku/customisations/ndv4.sh b/files/sku/customisations/ndv4.sh
new file mode 100755
index 0000000..ffad164
--- /dev/null
+++ b/files/sku/customisations/ndv4.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Link the NDv4 topology file, no graph for this machine type
+ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv4-topo.xml ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
+
+# Apply NDv4 specific NCCL configurations
+echo "NCCL_IB_PCI_RELAXED_ORDERING=1" >> ${NCCL_CONF}
+
+## NVIDIA Fabric manager
+systemctl enable nvidia-fabricmanager
+systemctl start nvidia-fabricmanager
+systemctl is-active --quiet nvidia-fabricmanager
+
+error_code=$?
+if [ ${error_code} -ne 0 ]; then
+ echo "NVIDIA Fabric Manager Inactive!"
+ exit ${error_code}
+fi
diff --git a/files/sku/customisations/ndv5.sh b/files/sku/customisations/ndv5.sh
new file mode 100755
index 0000000..53473e0
--- /dev/null
+++ b/files/sku/customisations/ndv5.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Link the NDv5 topology file, no graph for this machine type
+ln -sf ${TOPOLOGY_SRC_DIR}/topology/ndv5-topo.xml ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
+
+# Apply NDv5 specific NCCL configurations
+echo "NCCL_IB_PCI_RELAXED_ORDERING=1" >> ${NCCL_CONF}
+
+## NVIDIA Fabric manager
+systemctl enable nvidia-fabricmanager
+systemctl start nvidia-fabricmanager
+systemctl is-active --quiet nvidia-fabricmanager
+
+error_code=$?
+if [ ${error_code} -ne 0 ]; then
+ echo "NVIDIA Fabric Manager Inactive!"
+ exit ${error_code}
+fi
diff --git a/files/sku/customisations/ndv6.sh b/files/sku/customisations/ndv6.sh
new file mode 100755
index 0000000..7eb7416
--- /dev/null
+++ b/files/sku/customisations/ndv6.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# there are no current topology optimisations for this hardware.
+rm -f ${TOPOLOGY_FILE}
+rm -f ${TOPOLOGY_GRAPH}
+
diff --git a/files/sku/topology/ncv4-graph.xml b/files/sku/topology/ncv4-graph.xml
new file mode 100644
index 0000000..c28cf52
--- /dev/null
+++ b/files/sku/topology/ncv4-graph.xml
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/files/sku/topology/ncv4-topo.xml b/files/sku/topology/ncv4-topo.xml
new file mode 100644
index 0000000..cf0eeb6
--- /dev/null
+++ b/files/sku/topology/ncv4-topo.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/files/sku/topology/ndv2-topo.xml b/files/sku/topology/ndv2-topo.xml
new file mode 100644
index 0000000..dd1e0cd
--- /dev/null
+++ b/files/sku/topology/ndv2-topo.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/files/sku/topology/ndv4-topo.xml b/files/sku/topology/ndv4-topo.xml
new file mode 100644
index 0000000..dde6f06
--- /dev/null
+++ b/files/sku/topology/ndv4-topo.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/files/sku/topology/ndv5-topo.xml b/files/sku/topology/ndv5-topo.xml
new file mode 100644
index 0000000..fce6581
--- /dev/null
+++ b/files/sku/topology/ndv5-topo.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tasks/main.yml b/tasks/main.yml
index ba8fd7d..713748a 100644
--- a/tasks/main.yml
+++ b/tasks/main.yml
@@ -704,6 +704,118 @@
mode: '0644'
notify: Reload udev
+- name: Ensure Azure HPC resource directories exists
+ block:
+ - name: Check for resource directory existence
+ stat:
+ path: "{{ __hpc_azure_resource_dir }}"
+ register: __hpc_azure_resource_dir_stat
+
+ - name: Check for runtime directory existence
+ stat:
+ path: "{{ __hpc_azure_runtime_dir }}"
+ register: __hpc_azure_runtime_dir_stat
+
+ - name: Create resource directories
+ when: not __hpc_azure_resource_dir_stat.stat.exists
+ block:
+ - name: Create base resource directory
+ file:
+ path: "{{ __hpc_azure_resource_dir }}"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Location for binaries
+ file:
+ path: "{{ __hpc_azure_resource_dir }}/bin"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Location for tools
+ file:
+ path: "{{ __hpc_azure_tools_dir }}"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Location for tests
+ file:
+ path: "{{ __hpc_azure_tests_dir }}"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Runtime resource directory
+ when: not __hpc_azure_runtime_dir_stat.stat.exists
+ file:
+ path: "{{ __hpc_azure_runtime_dir }}"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+
+- name: Install SKU Customisation scripts and services
+ when: hpc_sku_customisation
+ block:
+ - name: Check if already installed
+ stat:
+ path: "{{ __hpc_azure_resource_dir }}/topology"
+ register: __hpc_sku_topology_stat
+
+ - name: Install files
+ when: not __hpc_sku_topology_stat.stat.exists
+ block:
+ - name: Install Topology Definitions
+ copy:
+ src: sku/topology/
+ dest: "{{ __hpc_azure_resource_dir }}/topology"
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Install Graph Files
+ copy:
+ src: sku/customisations/
+ dest: "{{ __hpc_azure_resource_dir }}/customisations"
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Install setup script
+ template:
+ src: sku/setup_sku_customisations.sh
+ dest: "{{ __hpc_azure_resource_dir }}/bin/setup_sku_customisations.sh"
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Install removal script
+ template:
+ src: sku/remove_sku_customisations.sh
+ dest: "{{ __hpc_azure_resource_dir }}/bin/remove_sku_customisations.sh"
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Install systemd service file
+ template:
+ src: sku/sku_customisation.service
+ dest: /etc/systemd/system/
+ owner: root
+ group: root
+ mode: '0755'
+
+ - name: Enable systemd service file
+ service:
+ name: sku_customisation.service
+ enabled: true
+
- name: Remove build dependencies
vars:
__hpc_dependencies: >-
diff --git a/templates/sku/remove_sku_customisations.sh b/templates/sku/remove_sku_customisations.sh
new file mode 100644
index 0000000..ca89f9e
--- /dev/null
+++ b/templates/sku/remove_sku_customisations.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+{{ ansible_managed | comment }}
+{{ "system_role:hpc" | comment(prefix="", postfix="") }}
+
+export TOPOLOGY_SRC_DIR="{{ __hpc_azure_resource_dir }}"
+export TOPOLOGY_RUNTIME_DIR="{{ __hpc_azure_runtime_dir }}/topology"
+
+# Stop nvidia fabric manager
+if systemctl is-active --quiet nvidia-fabricmanager ; then
+ systemctl stop nvidia-fabricmanager
+ systemctl disable nvidia-fabricmanager
+fi
+
+# Remove NVIDIA peer memory module
+if lsmod | grep nvidia_peermem &> /dev/null ; then
+ rmmod nvidia_peermem
+fi
+
+# Clear topo and graph files
+rm -rf ${TOPOLOGY_RUNTIME_DIR}
+
+# Clear contents of nccl.conf
+cat /dev/null > /etc/nccl.conf
diff --git a/templates/sku/setup_sku_customisations.sh b/templates/sku/setup_sku_customisations.sh
new file mode 100644
index 0000000..105638a
--- /dev/null
+++ b/templates/sku/setup_sku_customisations.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+{{ ansible_managed | comment }}
+{{ "system_role:hpc" | comment(prefix="", postfix="") }}
+
+# Set up the common NCCL config options so we can append the configured
+# customisations to it as needed.
+export NCCL_CONF="/etc/nccl.conf"
+echo "NCCL_IGNORE_CPU_AFFINITY=1" > ${NCCL_CONF}
+
+# define the location for the topology and graph customisation files
+export TOPOLOGY_SRC_DIR="{{ __hpc_azure_resource_dir }}"
+export TOPOLOGY_RUNTIME_DIR="{{ __hpc_azure_runtime_dir }}/topology"
+export TOPOLOGY_GRAPH="${TOPOLOGY_RUNTIME_DIR}/graph.xml"
+export TOPOLOGY_FILE="${TOPOLOGY_RUNTIME_DIR}/topo.xml"
+mkdir -p $TOPOLOGY_RUNTIME_DIR
+
+# Use the internal metadata service API to determine the type of machine and
+# apply the necessary customisations for that machine.
+#
+# Note: for manual testing, we mock the SKU from the test environment rather
+# than doing an API lookup. The API based customisation will be tested fully
+# from the CI system that runs the testing on appropriate hardware.
+if [ -z "$__MOCK_SKU" ]; then
+ metadata_endpoint="http://169.254.169.254/metadata/instance?api-version=2019-06-04"
+
+ retry_count=0
+ while (( retry_count++ < 5 )); do
+ sku=$(curl -s -H Metadata:true ${metadata_endpoint} | jq -r ".compute.vmSize")
+ [ -z $sku ] || break
+ sleep 30
+ done
+else
+ sku="$__MOCK_SKU"
+fi
+
+if [ -z "$sku" ]; then
+ echo "Error! Could not retrieve VM Size from IMDS endpoint"
+ exit 1
+fi
+
+sku=$(echo "$sku" | awk '{print tolower($0)}')
+
+## Topo file setup based on SKU
+case $sku in
+ standard_nc96ads_a100_v4)
+ $TOPOLOGY_SRC_DIR/customisations/ncv4.sh;;
+
+ standard_nd*v4)
+ $TOPOLOGY_SRC_DIR/customisations/ndv4.sh;;
+
+ standard_nd40rs_v2)
+ $TOPOLOGY_SRC_DIR/customisations/ndv2.sh;;
+
+ standard_hb176*v4)
+ $TOPOLOGY_SRC_DIR/customisations/hbv4.sh;;
+
+ standard_nc80adis_h100_v5)
+ $TOPOLOGY_SRC_DIR/customisations/ncv5.sh;;
+
+ standard_nd96is*_h[1-2]00_v5)
+ $TOPOLOGY_SRC_DIR/customisations/ndv5.sh;;
+
+ standard_nd128is*_gb[2-3]00_v6)
+ $TOPOLOGY_SRC_DIR/customisations/ndv6.sh;;
+
+ *) echo "No SKU customization for $sku"
+ rm -f $TOPOLOGY_GRAPH
+ rm -f $TOPOLOGY_FILE
+ rm -f $NCCL_CONF
+ ;;
+esac
+
+# Point NCCL at the configured topology and graph files
+if [ -e "${TOPOLOGY_FILE}" ]; then
+ echo "NCCL_TOPO_FILE=${TOPOLOGY_FILE}" >> ${NCCL_CONF}
+fi
+if [ -e "${TOPOLOGY_GRAPH}" ]; then
+ echo "NCCL_GRAPH_FILE=${TOPOLOGY_GRAPH}" >> ${NCCL_CONF}
+fi
+
diff --git a/templates/sku/sku_customisation.service b/templates/sku/sku_customisation.service
new file mode 100644
index 0000000..b3a83e9
--- /dev/null
+++ b/templates/sku/sku_customisation.service
@@ -0,0 +1,16 @@
+{{ ansible_managed | comment }}
+{{ "system_role:hpc" | comment(prefix="", postfix="") }}
+[Unit]
+Description=Customisations based on SKU
+After=network.target
+
+[Service]
+Type=oneshot
+ExecStart={{ __hpc_azure_resource_dir }}/bin/setup_sku_customisations.sh
+ExecStop={{ __hpc_azure_resource_dir }}/bin/remove_sku_customisations.sh
+RemainAfterExit=true
+StandardOutput=journal
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/tests/roles/linux-system-roles.hpc/files b/tests/roles/linux-system-roles.hpc/files
new file mode 120000
index 0000000..aa29175
--- /dev/null
+++ b/tests/roles/linux-system-roles.hpc/files
@@ -0,0 +1 @@
+../../../files
\ No newline at end of file
diff --git a/vars/main.yml b/vars/main.yml
index 2773cf3..9e7bde2 100644
--- a/vars/main.yml
+++ b/vars/main.yml
@@ -71,6 +71,14 @@ __hpc_kernel_versionlock_rpms:
__hpc_install_prefix: /opt
__hpc_moneo_install_dir: /opt/azurehpc/tools
+# location for azure specific scripts and resources
+__hpc_azure_resource_dir: "{{ __hpc_install_prefix }}/hpc/azure"
+__hpc_azure_tools_dir: "{{ __hpc_azure_resource_dir }}/tools"
+__hpc_azure_tests_dir: "{{ __hpc_azure_resource_dir }}/tests"
+
+# location for runtime selected azure resources
+__hpc_azure_runtime_dir: /var/hpc/azure
+
__hpc_module_dir: /usr/share/modulefiles
# A variable for testing purposes to force a specific kernel version to be installed