Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions .github/workflows/build-and-test-python-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,15 @@ defaults:
shell: bash --noprofile --norc -euo pipefail {0}

jobs:
# Build wheels for all CUDA/Python combinations
build-wheels:
name: Build wheel (CUDA ${{ matrix.cuda }}, Python ${{ matrix.python }})
name: Build wheel (Python ${{ matrix.python }})
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
strategy:
fail-fast: false
matrix:
cuda: ['12', '13']
python: ['3.10', '3.11', '3.12', '3.13']

steps:
Expand All @@ -32,19 +30,20 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build wheel
- name: Build multi-CUDA wheel
run: |
bash ci/build_pynvbench_wheel.sh -py-version ${{ matrix.python }} -cuda-version ${{ matrix.cuda }}
bash ci/build_multi_cuda_wheel.sh -py-version ${{ matrix.python }}
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: wheel-pynvbench-cu${{ matrix.cuda }}-py${{ matrix.python }}
name: wheel-pynvbench-py${{ matrix.python }}
path: wheelhouse/*.whl
retention-days: 7
if-no-files-found: error

# Test wheels for all CUDA/Python combinations
# Test wheels on all CUDA/Python combinations
# Each wheel contains extensions for both CUDA 12 & 13, runtime detection picks the right one
test-wheels:
name: Test wheel (CUDA ${{ matrix.cuda }}, Python ${{ matrix.python }})
needs: build-wheels
Expand All @@ -68,16 +67,18 @@ jobs:
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: wheel-pynvbench-cu${{ matrix.cuda }}-py${{ matrix.python }}
name: wheel-pynvbench-py${{ matrix.python }}
path: wheelhouse

- name: Test wheel
run: |
# Use the same rapidsai/ci-wheel Docker image as build
if [[ "${{ matrix.cuda }}" == "12" ]]; then
cuda_full_version="12.9.1"
cuda_extra="cu12"
else
cuda_full_version="13.0.1"
cuda_extra="cu13"
fi
docker run --rm \
Expand All @@ -86,6 +87,7 @@ jobs:
--mount type=bind,source=$(pwd),target=/workspace/ \
--env py_version=${{ matrix.python }} \
--env cuda_version=${{ matrix.cuda }} \
--env cuda_extra="${cuda_extra}" \
rapidsai/ci-wheel:25.12-cuda${cuda_full_version}-rockylinux8-py${{ matrix.python }} \
/workspace/ci/test_pynvbench_inner.sh
Expand Down
58 changes: 0 additions & 58 deletions .github/workflows/build-wheels.yml

This file was deleted.

5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@ CMakeUserPresets.json

# Python wheel builds
wheelhouse/
wheelhouse_merged/
wheelhouse_final/
dist/
dist_*/
*.whl
*.egg-info/
__pycache__/
*.pyc
*.pyo
.pytest_cache/
192 changes: 192 additions & 0 deletions ci/build_multi_cuda_wheel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
#!/bin/bash
# Apache 2.0 License
# Copyright 2024-2025 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 with the LLVM exception
# (the "License"); you may not use this file except in compliance with
# the License.
#
# You may obtain a copy of the License at
#
# http://llvm.org/foundation/relicensing/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build a multi-CUDA wheel for the given Python version
# This builds separate wheels for each supported CUDA major version,
# and then merges them into a single wheel containing extensions
# for all CUDA versions. At runtime, depending on the installed CUDA version,
# the correct extension will be chosen.

set -euo pipefail

ci_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

usage="Usage: $0 -py-version <python_version> [additional options...]"

source "$ci_dir/util/python/common_arg_parser.sh"
parse_python_args "$@"

# Check if py_version was provided (this script requires it)
require_py_version "$usage" || exit 1

echo "Docker socket: " $(ls /var/run/docker.sock)

# Set HOST_WORKSPACE if not already set (for local runs)
if [[ -z "${HOST_WORKSPACE:-}" ]]; then
# Get the repository root
HOST_WORKSPACE="$(cd "${ci_dir}/.." && pwd)"
echo "Setting HOST_WORKSPACE to: $HOST_WORKSPACE"
fi

# pynvbench must be built in a container that can produce manylinux wheels,
# and has the CUDA toolkit installed. We use the rapidsai/ci-wheel image for this.
# We build separate wheels using separate containers for each CUDA version,
# then merge them into a single wheel.

readonly cuda12_version=12.9.1
readonly cuda13_version=13.0.1
readonly devcontainer_version=25.12
readonly devcontainer_distro=rockylinux8

if [[ "$(uname -m)" == "aarch64" ]]; then
readonly host_arch_suffix="-arm64"
else
readonly host_arch_suffix=""
fi

readonly cuda12_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda12_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix}
readonly cuda13_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda13_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix}

mkdir -p wheelhouse

for ctk in 12 13; do
image=$(eval echo \$cuda${ctk}_image)
echo "::group::⚒️ Building CUDA ${ctk} wheel on ${image}"
(
set -x
docker pull $image
docker run --rm -i \
--workdir /workspace/python \
--mount type=bind,source=${HOST_WORKSPACE},target=/workspace/ \
--env py_version=${py_version} \
$image \
/workspace/ci/build_pynvbench_wheel_for_cuda.sh
# Prevent GHA runners from exhausting available storage with leftover images:
if [[ -n "${GITHUB_ACTIONS:-}" ]]; then
docker rmi -f $image
fi
)
echo "::endgroup::"
done

echo "Merging CUDA wheels..."

# Detect python command
if command -v python &> /dev/null; then
PYTHON=python
elif command -v python3 &> /dev/null; then
PYTHON=python3
else
echo "Error: No python found"
exit 1
fi

# Needed for unpacking and repacking wheels.
$PYTHON -m pip install --break-system-packages wheel

# Find the built wheels (temporarily suffixed with .cu12/.cu13 to avoid collision)
cu12_wheel=$(find wheelhouse -name "*cu12*.whl" | head -1)
cu13_wheel=$(find wheelhouse -name "*cu13*.whl" | head -1)

if [[ -z "$cu12_wheel" ]]; then
echo "Error: CUDA 12 wheel not found in wheelhouse/"
ls -la wheelhouse/
exit 1
fi

if [[ -z "$cu13_wheel" ]]; then
echo "Error: CUDA 13 wheel not found in wheelhouse/"
ls -la wheelhouse/
exit 1
fi

if [[ "$cu12_wheel" == "$cu13_wheel" ]]; then
echo "Error: Only one wheel found, expected two (CUDA 12 and CUDA 13)"
ls -la wheelhouse/
exit 1
fi

echo "Found CUDA 12 wheel: $cu12_wheel"
echo "Found CUDA 13 wheel: $cu13_wheel"

# Convert to absolute paths before changing directory
cu12_wheel=$(readlink -f "$cu12_wheel")
cu13_wheel=$(readlink -f "$cu13_wheel")

# Merge the wheels manually
mkdir -p wheelhouse_merged
cd wheelhouse_merged

# Unpack CUDA 12 wheel (this will be our base)
$PYTHON -m wheel unpack "$cu12_wheel"
base_dir=$(find . -maxdepth 1 -type d -name "pynvbench-*" | head -1)

# Unpack CUDA 13 wheel into a temporary subdirectory
mkdir cu13_tmp
cd cu13_tmp
$PYTHON -m wheel unpack "$cu13_wheel"
cu13_dir=$(find . -maxdepth 1 -type d -name "pynvbench-*" | head -1)

# Copy the cu13/ directory from CUDA 13 wheel into the base wheel
cp -r "$cu13_dir"/cuda/bench/cu13 "../$base_dir/cuda/bench/"

# Go back and clean up
cd ..
rm -rf cu13_tmp

# Remove RECORD file to let wheel recreate it
rm -f "$base_dir"/*.dist-info/RECORD

# Repack the merged wheel
$PYTHON -m wheel pack "$base_dir"

cd ..

# Install auditwheel and repair the merged wheel
$PYTHON -m pip install --break-system-packages auditwheel
for wheel in wheelhouse_merged/pynvbench-*.whl; do
echo "Repairing merged wheel: $wheel"
$PYTHON -m auditwheel repair \
--exclude 'libcuda.so.1' \
--exclude 'libnvidia-ml.so.1' \
--exclude 'libcupti.so.12' \
--exclude 'libcupti.so.13' \
--exclude 'libnvperf_host.so' \
--exclude 'libnvperf_target.so' \
"$wheel" \
--wheel-dir wheelhouse_final
done

# Clean up intermediate files and move only the final merged wheel to wheelhouse
rm -rf wheelhouse/* # Clean existing wheelhouse
mkdir -p wheelhouse

# Move only the final repaired merged wheel
if ls wheelhouse_final/pynvbench-*.whl 1> /dev/null 2>&1; then
mv wheelhouse_final/pynvbench-*.whl wheelhouse/
echo "Final merged wheel moved to wheelhouse"
else
echo "No final repaired wheel found, moving unrepaired merged wheel"
mv wheelhouse_merged/pynvbench-*.whl wheelhouse/
fi

# Clean up temporary directories
rm -rf wheelhouse_merged wheelhouse_final

echo "Final wheels in wheelhouse:"
ls -la wheelhouse/
Loading