diff --git a/.github/workflows/build-and-test-python-wheels.yml b/.github/workflows/build-and-test-python-wheels.yml index d51f504e..46f391bc 100644 --- a/.github/workflows/build-and-test-python-wheels.yml +++ b/.github/workflows/build-and-test-python-wheels.yml @@ -9,9 +9,8 @@ defaults: shell: bash --noprofile --norc -euo pipefail {0} jobs: - # Build wheels for all CUDA/Python combinations build-wheels: - name: Build wheel (CUDA ${{ matrix.cuda }}, Python ${{ matrix.python }}) + name: Build wheel (Python ${{ matrix.python }}) runs-on: ubuntu-latest permissions: id-token: write @@ -19,7 +18,6 @@ jobs: strategy: fail-fast: false matrix: - cuda: ['12', '13'] python: ['3.10', '3.11', '3.12', '3.13'] steps: @@ -32,19 +30,20 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Build wheel + - name: Build multi-CUDA wheel run: | - bash ci/build_pynvbench_wheel.sh -py-version ${{ matrix.python }} -cuda-version ${{ matrix.cuda }} + bash ci/build_multi_cuda_wheel.sh -py-version ${{ matrix.python }} - name: Upload wheel artifact uses: actions/upload-artifact@v4 with: - name: wheel-pynvbench-cu${{ matrix.cuda }}-py${{ matrix.python }} + name: wheel-pynvbench-py${{ matrix.python }} path: wheelhouse/*.whl retention-days: 7 if-no-files-found: error - # Test wheels for all CUDA/Python combinations + # Test wheels on all CUDA/Python combinations + # Each wheel contains extensions for both CUDA 12 & 13, runtime detection picks the right one test-wheels: name: Test wheel (CUDA ${{ matrix.cuda }}, Python ${{ matrix.python }}) needs: build-wheels @@ -68,7 +67,7 @@ jobs: - name: Download wheel artifact uses: actions/download-artifact@v4 with: - name: wheel-pynvbench-cu${{ matrix.cuda }}-py${{ matrix.python }} + name: wheel-pynvbench-py${{ matrix.python }} path: wheelhouse - name: Test wheel @@ -76,8 +75,10 @@ jobs: # Use the same rapidsai/ci-wheel Docker image as build if [[ "${{ matrix.cuda }}" == "12" ]]; then cuda_full_version="12.9.1" + cuda_extra="cu12" else cuda_full_version="13.0.1" + cuda_extra="cu13" fi docker run --rm \ @@ -86,6 +87,7 @@ jobs: --mount type=bind,source=$(pwd),target=/workspace/ \ --env py_version=${{ matrix.python }} \ --env cuda_version=${{ matrix.cuda }} \ + --env cuda_extra="${cuda_extra}" \ rapidsai/ci-wheel:25.12-cuda${cuda_full_version}-rockylinux8-py${{ matrix.python }} \ /workspace/ci/test_pynvbench_inner.sh diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml deleted file mode 100644 index 37dfb3d7..00000000 --- a/.github/workflows/build-wheels.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Build Python Wheels (Manual) - -on: - workflow_dispatch: - workflow_call: - -defaults: - run: - shell: bash --noprofile --norc -euo pipefail {0} - -jobs: - build-wheels: - name: Build wheel (CUDA ${{ matrix.cuda }}, Python ${{ matrix.python }}) - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - cuda: ['12', '13'] - python: ['3.10', '3.11', '3.12', '3.13'] - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - persist-credentials: false - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build wheel - run: | - bash ci/build_pynvbench_wheel.sh -py-version ${{ matrix.python }} -cuda-version ${{ matrix.cuda }} - - - name: Upload wheel artifact - uses: actions/upload-artifact@v4 - with: - name: wheel-pynvbench-cu${{ matrix.cuda }}-py${{ matrix.python }} - path: wheelhouse/*.whl - retention-days: 7 - if-no-files-found: error - - verify-wheels: - name: Verify all wheels built successfully - if: ${{ always() }} - needs: build-wheels - runs-on: ubuntu-latest - steps: - - name: Check build results - run: | - if [[ "${{ needs.build-wheels.result }}" != "success" ]]; then - echo "Wheel builds failed!" - exit 1 - fi - echo "All wheels built successfully!" diff --git a/.gitignore b/.gitignore index 2f64907d..eec6d226 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,13 @@ CMakeUserPresets.json # Python wheel builds wheelhouse/ +wheelhouse_merged/ +wheelhouse_final/ +dist/ +dist_*/ *.whl *.egg-info/ __pycache__/ *.pyc *.pyo +.pytest_cache/ diff --git a/ci/build_multi_cuda_wheel.sh b/ci/build_multi_cuda_wheel.sh new file mode 100755 index 00000000..058dd311 --- /dev/null +++ b/ci/build_multi_cuda_wheel.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# Apache 2.0 License +# Copyright 2024-2025 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Build a multi-CUDA wheel for the given Python version +# This builds separate wheels for each supported CUDA major version, +# and then merges them into a single wheel containing extensions +# for all CUDA versions. At runtime, depending on the installed CUDA version, +# the correct extension will be chosen. + +set -euo pipefail + +ci_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +usage="Usage: $0 -py-version [additional options...]" + +source "$ci_dir/util/python/common_arg_parser.sh" +parse_python_args "$@" + +# Check if py_version was provided (this script requires it) +require_py_version "$usage" || exit 1 + +echo "Docker socket: " $(ls /var/run/docker.sock) + +# Set HOST_WORKSPACE if not already set (for local runs) +if [[ -z "${HOST_WORKSPACE:-}" ]]; then + # Get the repository root + HOST_WORKSPACE="$(cd "${ci_dir}/.." && pwd)" + echo "Setting HOST_WORKSPACE to: $HOST_WORKSPACE" +fi + +# pynvbench must be built in a container that can produce manylinux wheels, +# and has the CUDA toolkit installed. We use the rapidsai/ci-wheel image for this. +# We build separate wheels using separate containers for each CUDA version, +# then merge them into a single wheel. + +readonly cuda12_version=12.9.1 +readonly cuda13_version=13.0.1 +readonly devcontainer_version=25.12 +readonly devcontainer_distro=rockylinux8 + +if [[ "$(uname -m)" == "aarch64" ]]; then + readonly host_arch_suffix="-arm64" +else + readonly host_arch_suffix="" +fi + +readonly cuda12_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda12_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix} +readonly cuda13_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda13_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix} + +mkdir -p wheelhouse + +for ctk in 12 13; do + image=$(eval echo \$cuda${ctk}_image) + echo "::group::⚒️ Building CUDA ${ctk} wheel on ${image}" + ( + set -x + docker pull $image + docker run --rm -i \ + --workdir /workspace/python \ + --mount type=bind,source=${HOST_WORKSPACE},target=/workspace/ \ + --env py_version=${py_version} \ + $image \ + /workspace/ci/build_pynvbench_wheel_for_cuda.sh + # Prevent GHA runners from exhausting available storage with leftover images: + if [[ -n "${GITHUB_ACTIONS:-}" ]]; then + docker rmi -f $image + fi + ) + echo "::endgroup::" +done + +echo "Merging CUDA wheels..." + +# Detect python command +if command -v python &> /dev/null; then + PYTHON=python +elif command -v python3 &> /dev/null; then + PYTHON=python3 +else + echo "Error: No python found" + exit 1 +fi + +# Needed for unpacking and repacking wheels. +$PYTHON -m pip install --break-system-packages wheel + +# Find the built wheels (temporarily suffixed with .cu12/.cu13 to avoid collision) +cu12_wheel=$(find wheelhouse -name "*cu12*.whl" | head -1) +cu13_wheel=$(find wheelhouse -name "*cu13*.whl" | head -1) + +if [[ -z "$cu12_wheel" ]]; then + echo "Error: CUDA 12 wheel not found in wheelhouse/" + ls -la wheelhouse/ + exit 1 +fi + +if [[ -z "$cu13_wheel" ]]; then + echo "Error: CUDA 13 wheel not found in wheelhouse/" + ls -la wheelhouse/ + exit 1 +fi + +if [[ "$cu12_wheel" == "$cu13_wheel" ]]; then + echo "Error: Only one wheel found, expected two (CUDA 12 and CUDA 13)" + ls -la wheelhouse/ + exit 1 +fi + +echo "Found CUDA 12 wheel: $cu12_wheel" +echo "Found CUDA 13 wheel: $cu13_wheel" + +# Convert to absolute paths before changing directory +cu12_wheel=$(readlink -f "$cu12_wheel") +cu13_wheel=$(readlink -f "$cu13_wheel") + +# Merge the wheels manually +mkdir -p wheelhouse_merged +cd wheelhouse_merged + +# Unpack CUDA 12 wheel (this will be our base) +$PYTHON -m wheel unpack "$cu12_wheel" +base_dir=$(find . -maxdepth 1 -type d -name "pynvbench-*" | head -1) + +# Unpack CUDA 13 wheel into a temporary subdirectory +mkdir cu13_tmp +cd cu13_tmp +$PYTHON -m wheel unpack "$cu13_wheel" +cu13_dir=$(find . -maxdepth 1 -type d -name "pynvbench-*" | head -1) + +# Copy the cu13/ directory from CUDA 13 wheel into the base wheel +cp -r "$cu13_dir"/cuda/bench/cu13 "../$base_dir/cuda/bench/" + +# Go back and clean up +cd .. +rm -rf cu13_tmp + +# Remove RECORD file to let wheel recreate it +rm -f "$base_dir"/*.dist-info/RECORD + +# Repack the merged wheel +$PYTHON -m wheel pack "$base_dir" + +cd .. + +# Install auditwheel and repair the merged wheel +$PYTHON -m pip install --break-system-packages auditwheel +for wheel in wheelhouse_merged/pynvbench-*.whl; do + echo "Repairing merged wheel: $wheel" + $PYTHON -m auditwheel repair \ + --exclude 'libcuda.so.1' \ + --exclude 'libnvidia-ml.so.1' \ + --exclude 'libcupti.so.12' \ + --exclude 'libcupti.so.13' \ + --exclude 'libnvperf_host.so' \ + --exclude 'libnvperf_target.so' \ + "$wheel" \ + --wheel-dir wheelhouse_final +done + +# Clean up intermediate files and move only the final merged wheel to wheelhouse +rm -rf wheelhouse/* # Clean existing wheelhouse +mkdir -p wheelhouse + +# Move only the final repaired merged wheel +if ls wheelhouse_final/pynvbench-*.whl 1> /dev/null 2>&1; then + mv wheelhouse_final/pynvbench-*.whl wheelhouse/ + echo "Final merged wheel moved to wheelhouse" +else + echo "No final repaired wheel found, moving unrepaired merged wheel" + mv wheelhouse_merged/pynvbench-*.whl wheelhouse/ +fi + +# Clean up temporary directories +rm -rf wheelhouse_merged wheelhouse_final + +echo "Final wheels in wheelhouse:" +ls -la wheelhouse/ diff --git a/ci/build_pynvbench_wheel.sh b/ci/build_pynvbench_wheel.sh deleted file mode 100755 index 306891be..00000000 --- a/ci/build_pynvbench_wheel.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -set -euo pipefail - -ci_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -usage="Usage: $0 -py-version -cuda-version " - -source "$ci_dir/util/python/common_arg_parser.sh" - -# Parse arguments including CUDA version -parse_python_args "$@" - -# Now parse CUDA version from remaining arguments -cuda_version="" -while [[ $# -gt 0 ]]; do - case $1 in - -cuda-version=*) - cuda_version="${1#*=}" - shift - ;; - -cuda-version) - if [[ $# -lt 2 ]]; then - echo "Error: -cuda-version requires a value" >&2 - exit 1 - fi - cuda_version="$2" - shift 2 - ;; - *) - shift - ;; - esac -done - -# Check if py_version was provided (this script requires it) -require_py_version "$usage" || exit 1 - -if [[ -z "$cuda_version" ]]; then - echo "Error: -cuda-version is required" - echo "$usage" - exit 1 -fi - -echo "Docker socket: " $(ls /var/run/docker.sock) - -# Map cuda_version to full version -if [[ "$cuda_version" == "12" ]]; then - cuda_full_version="12.9.1" - cuda_short="cu12" -elif [[ "$cuda_version" == "13" ]]; then - cuda_full_version="13.0.1" - cuda_short="cu13" -else - echo "Error: Unsupported CUDA version: $cuda_version" - exit 1 -fi - -# pynvbench must be built in a container that can produce manylinux wheels, -# and has the CUDA toolkit installed. We use the rapidsai/ci-wheel image for this. -readonly devcontainer_version=25.12 -readonly devcontainer_distro=rockylinux8 - -if [[ "$(uname -m)" == "aarch64" ]]; then - readonly cuda_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda_full_version}-${devcontainer_distro}-py${py_version}-arm64 -else - readonly cuda_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda_full_version}-${devcontainer_distro}-py${py_version} -fi - -mkdir -p wheelhouse - -echo "::group::⚒️ Building CUDA ${cuda_version} wheel on ${cuda_image}" -( - set -x - docker pull $cuda_image - docker run --rm -i \ - --workdir /workspace/python \ - --mount type=bind,source=$(pwd),target=/workspace/ \ - --env py_version=${py_version} \ - --env cuda_version=${cuda_version} \ - $cuda_image \ - /workspace/ci/build_pynvbench_wheel_inner.sh - # Prevent GHA runners from exhausting available storage with leftover images: - if [[ -n "${GITHUB_ACTIONS:-}" ]]; then - docker rmi -f $cuda_image - fi -) -echo "::endgroup::" - -echo "Wheels in wheelhouse:" -ls -la wheelhouse/ diff --git a/ci/build_pynvbench_wheel_for_cuda.sh b/ci/build_pynvbench_wheel_for_cuda.sh new file mode 100755 index 00000000..c0289ca2 --- /dev/null +++ b/ci/build_pynvbench_wheel_for_cuda.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Apache 2.0 License +# Copyright 2024-2025 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 with the LLVM exception +# (the "License"); you may not use this file except in compliance with +# the License. +# +# You may obtain a copy of the License at +# +# http://llvm.org/foundation/relicensing/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euxo pipefail + +# Target script for `docker run` command in build_multi_cuda_wheel.sh +# This script builds a single wheel for the container's CUDA version +# The /workspace pathnames are hard-wired here. + +# Determine CUDA version from nvcc early (needed for dev package installation) +cuda_version=$(nvcc --version | grep -oP 'release \K[0-9]+\.[0-9]+' | cut -d. -f1) +echo "Detected CUDA version: ${cuda_version}" + +# Install GCC 13 toolset (needed for the build) +/workspace/ci/util/retry.sh 5 30 dnf -y install gcc-toolset-13-gcc gcc-toolset-13-gcc-c++ +echo -e "#!/bin/bash\nsource /opt/rh/gcc-toolset-13/enable" >/etc/profile.d/enable_devtools.sh +source /etc/profile.d/enable_devtools.sh + +# Note: CUDA dev packages (NVML, CUPTI, CUDART) are already installed in rapidsai/ci-wheel containers + +# Set up Python environment +source /workspace/ci/pyenv_helper.sh +setup_python_env "${py_version}" +which python +python --version +echo "Done setting up python env" + +# Ensure we have full git history for setuptools_scm +if $(git rev-parse --is-shallow-repository); then + git fetch --unshallow +fi + +cd /workspace/python + +# Configure compilers +export CXX="$(which g++)" +export CUDACXX="$(which nvcc)" +export CUDAHOSTCXX="$(which g++)" + +# Build the wheel +python -m pip wheel --no-deps --verbose --wheel-dir dist . + +# Temporarily rename wheel to include CUDA version to avoid collision during multi-CUDA build +# The merge script will combine these into a single wheel +for wheel in dist/pynvbench-*.whl; do + if [[ -f "$wheel" ]]; then + base_name=$(basename "$wheel" .whl) + new_name="${base_name}.cu${cuda_version}.whl" + mv "$wheel" "dist/${new_name}" + echo "Renamed wheel to: ${new_name}" + fi +done + +# Move wheel to output directory +mkdir -p /workspace/wheelhouse +mv dist/pynvbench-*.cu*.whl /workspace/wheelhouse/ diff --git a/ci/build_pynvbench_wheel_inner.sh b/ci/build_pynvbench_wheel_inner.sh deleted file mode 100755 index 543deb6a..00000000 --- a/ci/build_pynvbench_wheel_inner.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Target script for `docker run` command in build_pynvbench_wheel.sh -# The /workspace pathnames are hard-wired here. - -# Install GCC 13 toolset (needed for the build) -/workspace/ci/util/retry.sh 5 30 dnf -y install gcc-toolset-13-gcc gcc-toolset-13-gcc-c++ -echo -e "#!/bin/bash\nsource /opt/rh/gcc-toolset-13/enable" >/etc/profile.d/enable_devtools.sh -source /etc/profile.d/enable_devtools.sh - -# Check what's available -which gcc -gcc --version -which nvcc -nvcc --version - -# Set up Python environment -source /workspace/ci/pyenv_helper.sh -setup_python_env "${py_version}" -which python -python --version -echo "Done setting up python env" - -# Ensure we have full git history for setuptools_scm -if $(git rev-parse --is-shallow-repository); then - git fetch --unshallow -fi - -cd /workspace/python - -# Determine CUDA version from nvcc -cuda_major=$(nvcc --version | grep -oP 'release \K[0-9]+\.[0-9]+' | cut -d. -f1) -echo "Detected CUDA major version: ${cuda_major}" - -# Configure compilers: -export CXX="$(which g++)" -export CUDACXX="$(which nvcc)" -export CUDAHOSTCXX="$(which g++)" - -# Build the wheel -python -m pip wheel --no-deps --verbose --wheel-dir dist . - -# Install auditwheel for manylinux compliance -python -m pip install auditwheel - -# Repair wheel to make it manylinux compliant -mkdir -p dist_repaired -for wheel in dist/pynvbench-*.whl; do - if [[ -f "$wheel" ]]; then - echo "Repairing wheel: $wheel" - python -m auditwheel repair \ - --exclude 'libcuda.so.1' \ - --exclude 'libnvidia-ml.so.1' \ - "$wheel" \ - --wheel-dir dist_repaired - fi -done - -# Rename wheel to include CUDA version suffix -mkdir -p /workspace/wheelhouse -for wheel in dist_repaired/pynvbench-*.whl; do - if [[ -f "$wheel" ]]; then - base_name=$(basename "$wheel" .whl) - # Append CUDA version to the local version identifier - # e.g., pynvbench-0.1.0.dev1+gabc123-cp312-cp312-manylinux_2_28_x86_64.whl - # becomes pynvbench-0.1.0.dev1+gabc123.cu12-cp312-cp312-manylinux_2_28_x86_64.whl - if [[ "$base_name" =~ ^(.*)-cp([0-9]+)-cp([0-9]+)-(.*) ]]; then - pkg_version="${BASH_REMATCH[1]}" - py_tag="cp${BASH_REMATCH[2]}" - abi_tag="cp${BASH_REMATCH[3]}" - platform="${BASH_REMATCH[4]}" - # If version has a local part (contains +), append .cu${cuda_major} to it - # Otherwise add +cu${cuda_major} - if [[ "$pkg_version" =~ \+ ]]; then - new_version="${pkg_version}.cu${cuda_major}" - else - new_version="${pkg_version}+cu${cuda_major}" - fi - new_name="${new_version}-${py_tag}-${abi_tag}-${platform}.whl" - mv "$wheel" "/workspace/wheelhouse/${new_name}" - echo "Renamed wheel to: ${new_name}" - else - # Fallback if regex doesn't match - mv "$wheel" /workspace/wheelhouse/ - echo "Moved wheel: $(basename $wheel)" - fi - fi -done diff --git a/ci/test_pynvbench.sh b/ci/test_pynvbench.sh index b021f0b5..53451515 100755 --- a/ci/test_pynvbench.sh +++ b/ci/test_pynvbench.sh @@ -40,11 +40,13 @@ if [[ -z "$cuda_version" ]]; then exit 1 fi -# Map cuda_version to full version +# Map cuda_version to full version and set CUDA extra if [[ "$cuda_version" == "12" ]]; then cuda_full_version="12.9.1" + cuda_extra="cu12" elif [[ "$cuda_version" == "13" ]]; then cuda_full_version="13.0.1" + cuda_extra="cu13" else echo "Error: Unsupported CUDA version: $cuda_version" exit 1 @@ -66,9 +68,11 @@ echo "::group::🧪 Testing CUDA ${cuda_version} wheel on ${cuda_image}" docker pull $cuda_image docker run --rm -i \ --workdir /workspace \ + --gpus all \ --mount type=bind,source=$(pwd),target=/workspace/ \ --env py_version=${py_version} \ --env cuda_version=${cuda_version} \ + --env cuda_extra="${cuda_extra}" \ $cuda_image \ /workspace/ci/test_pynvbench_inner.sh # Prevent GHA runners from exhausting available storage with leftover images: diff --git a/ci/test_pynvbench_inner.sh b/ci/test_pynvbench_inner.sh index 80748fa9..4303a5dd 100755 --- a/ci/test_pynvbench_inner.sh +++ b/ci/test_pynvbench_inner.sh @@ -24,19 +24,26 @@ echo "CUDA version: $(nvcc --version | grep release)" # Wheel should be in /workspace/wheelhouse (downloaded by workflow or built locally) WHEELHOUSE_DIR="/workspace/wheelhouse" -# Find and install pynvbench wheel -# Look for .cu${cuda_version} in the version string (e.g., pynvbench-0.0.1.dev1+g123.cu12-...) -PYNVBENCH_WHEEL_PATH="$(ls ${WHEELHOUSE_DIR}/pynvbench-*.cu${cuda_version}-*.whl 2>/dev/null | head -1)" +# Find the pynvbench wheel (multi-CUDA wheel) +# Prefer manylinux wheels, fall back to any wheel +PYNVBENCH_WHEEL_PATH="$(ls ${WHEELHOUSE_DIR}/pynvbench-*manylinux*.whl 2>/dev/null | head -1)" +if [[ -z "$PYNVBENCH_WHEEL_PATH" ]]; then + PYNVBENCH_WHEEL_PATH="$(ls ${WHEELHOUSE_DIR}/pynvbench-*.whl 2>/dev/null | head -1)" +fi + if [[ -z "$PYNVBENCH_WHEEL_PATH" ]]; then echo "Error: No pynvbench wheel found in ${WHEELHOUSE_DIR}" - echo "Looking for: pynvbench-*.cu${cuda_version}-*.whl" echo "Contents of ${WHEELHOUSE_DIR}:" ls -la ${WHEELHOUSE_DIR}/ || true exit 1 fi -echo "Installing wheel: $PYNVBENCH_WHEEL_PATH" -python -m pip install "${PYNVBENCH_WHEEL_PATH}[test]" +# Determine which CUDA extra to install (defaults to cu12 if not specified) +CUDA_EXTRA="${cuda_extra:-cu${cuda_version}}" +TEST_EXTRA="test-cu${cuda_version}" + +echo "Installing wheel: $PYNVBENCH_WHEEL_PATH with extras: ${TEST_EXTRA}" +python -m pip install "${PYNVBENCH_WHEEL_PATH}[${TEST_EXTRA}]" # Run tests cd "/workspace/python/test/" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c89c085b..7f8548c4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -25,19 +25,23 @@ CPMAddPackage( CPMAddPackage("gh:pybind/pybind11@3.0.1") +# Determine CUDA major version for directory structure +set(CUDA_VERSION_DIR "cu${CUDAToolkit_VERSION_MAJOR}") +message(STATUS "Building extension for CUDA ${CUDAToolkit_VERSION_MAJOR}, output directory: cuda/bench/${CUDA_VERSION_DIR}") + add_library(_nvbench MODULE src/py_nvbench.cpp) target_include_directories(_nvbench PRIVATE ${Python_INCLUDE_DIRS}) target_link_libraries(_nvbench PUBLIC nvbench::nvbench) -target_link_libraries(_nvbench PRIVATE CUDA::cudart_static pybind11::headers) +target_link_libraries(_nvbench PRIVATE CUDA::cudart_static CUDA::cuda_driver pybind11::headers) set_target_properties(_nvbench PROPERTIES INSTALL_RPATH "$ORIGIN") set_target_properties(_nvbench PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON) set_target_properties(_nvbench PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(_nvbench PROPERTIES PREFIX "" SUFFIX "${PYTHON_MODULE_EXTENSION}") -install(TARGETS _nvbench DESTINATION cuda/bench) +install(TARGETS _nvbench DESTINATION cuda/bench/${CUDA_VERSION_DIR}) # Determine target that nvbench::nvbench is an alias of, # necessary because ALIAS targets cannot be installed get_target_property(_aliased_target_name nvbench::nvbench ALIASED_TARGET) -install(IMPORTED_RUNTIME_ARTIFACTS ${_aliased_target_name} DESTINATION cuda/bench) +install(IMPORTED_RUNTIME_ARTIFACTS ${_aliased_target_name} DESTINATION cuda/bench/${CUDA_VERSION_DIR}) diff --git a/python/cuda/bench/__init__.py b/python/cuda/bench/__init__.py index e1d2282a..a47bb394 100644 --- a/python/cuda/bench/__init__.py +++ b/python/cuda/bench/__init__.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import importlib import importlib.metadata import warnings @@ -31,29 +32,63 @@ f"Version is set to fall-back value '{__version__}' instead." ) + +# Detect CUDA runtime version and load appropriate extension +def _get_cuda_major_version(): + """Detect the CUDA runtime major version.""" + try: + import cuda.bindings + + # Get CUDA version from cuda-bindings package version + # cuda-bindings version is in format like "12.9.1" or "13.0.0" + version_str = cuda.bindings.__version__ + major = int(version_str.split(".")[0]) + return major + except ImportError: + raise ImportError( + "cuda-bindings is required for runtime CUDA version detection. " + "Install with: pip install pynvbench[cu12] or pip install pynvbench[cu13]" + ) + + +_cuda_major = _get_cuda_major_version() +_extra_name = f"cu{_cuda_major}" +_module_fullname = f"cuda.bench.{_extra_name}._nvbench" + +try: + _nvbench_module = importlib.import_module(_module_fullname) +except ImportError as e: + raise ImportError( + f"No pynvbench extension found for CUDA {_cuda_major}.x. " + f"This wheel may not include support for your CUDA version. " + f"Supported CUDA versions: 12, 13. " + f"Original error: {e}" + ) + +# Load required NVIDIA libraries for libname in ("cupti", "nvperf_target", "nvperf_host"): load_nvidia_dynamic_lib(libname) -from cuda.bench._nvbench import ( # noqa: E402 - Benchmark as Benchmark, -) -from cuda.bench._nvbench import ( # noqa: E402 - CudaStream as CudaStream, -) -from cuda.bench._nvbench import ( # noqa: E402 - Launch as Launch, -) -from cuda.bench._nvbench import ( # noqa: E402 - NVBenchRuntimeError as NVBenchRuntimeError, -) -from cuda.bench._nvbench import ( # noqa: E402 - State as State, -) -from cuda.bench._nvbench import ( # noqa: E402 - register as register, -) -from cuda.bench._nvbench import ( # noqa: E402 - run_all_benchmarks as run_all_benchmarks, -) +# Import and expose all public symbols from the CUDA-specific extension +Benchmark = _nvbench_module.Benchmark +CudaStream = _nvbench_module.CudaStream +Launch = _nvbench_module.Launch +NVBenchRuntimeError = _nvbench_module.NVBenchRuntimeError +State = _nvbench_module.State +register = _nvbench_module.register +run_all_benchmarks = _nvbench_module.run_all_benchmarks +test_cpp_exception = _nvbench_module.test_cpp_exception +test_py_exception = _nvbench_module.test_py_exception + +# Expose the module as _nvbench for backward compatibility (e.g., for tests) +_nvbench = _nvbench_module -del load_nvidia_dynamic_lib +# Clean up internal symbols +del ( + load_nvidia_dynamic_lib, + _nvbench_module, + _cuda_major, + _extra_name, + _module_fullname, + _get_cuda_major_version, +) diff --git a/python/pyproject.toml b/python/pyproject.toml index d5356705..e752eca2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -22,22 +22,26 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - # pathfinder + # pathfinder for finding CUDA libraries "cuda-pathfinder", - - # Library expects to find shared libraries - # libcupti, libnvperf_target, libnvperf_host - # pathfinder is used to find it in the Python layout - "nvidia-cuda-cupti-cu12", - - # The shared library - # libnvidia-ml must be installed system-wide - # (Debian package provider: libnvidia-compute) ] dynamic = ["version"] readme = { file = "README.md", content-type = "text/markdown" } [project.optional-dependencies] +# CUDA 12.x dependencies +cu12 = ["cuda-bindings>=12.0.0,<13.0.0", "nvidia-cuda-cupti-cu12"] + +# CUDA 13.x dependencies +cu13 = ["cuda-bindings>=13.0.0,<14.0.0", "nvidia-cuda-cupti>=13.0"] + +# Test dependencies for CUDA 12 +test-cu12 = ["pynvbench[cu12]", "pytest", "cupy-cuda12x", "numba"] + +# Test dependencies for CUDA 13 +test-cu13 = ["pynvbench[cu13]", "pytest", "cupy-cuda13x", "numba"] + +# Generic test dependencies (defaults to CUDA 12) test = ["pytest", "cupy-cuda12x", "numba"] [project.urls] diff --git a/python/src/py_nvbench.cpp b/python/src/py_nvbench.cpp index 2b09574d..68a2f26f 100644 --- a/python/src/py_nvbench.cpp +++ b/python/src/py_nvbench.cpp @@ -272,7 +272,11 @@ std::unique_ptr global_registry{}; // If you modify these bindings, please be sure to update the // corresponding type hints in ``../cuda/nvbench/__init__.pyi`` -PYBIND11_MODULE(_nvbench, m) +#ifndef PYBIND11_MODULE_NAME +#define PYBIND11_MODULE_NAME _nvbench +#endif + +PYBIND11_MODULE(PYBIND11_MODULE_NAME, m) { // == STEP 1 // Set environment variable CUDA_MODULE_LOADING=EAGER