diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 30a490b..4925ad7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -42,25 +42,40 @@ jobs: wheel: name: Build Wheel - runs-on: ${{ matrix.os }} needs: release - + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: ['ubuntu-20.04'] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] - pytorch-version: ['2.2.2', '2.3.1', '2.4.0', '2.5.1', '2.6.0'] - cuda-version: ['12.4.0'] - exclude: - # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix - # Pytorch < 2.5 does not support Python 3.13 - - pytorch-version: '2.2.2' - python-version: '3.13' - - pytorch-version: '2.3.1' - python-version: '3.13' - - pytorch-version: '2.4.0' - python-version: '3.13' + # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the + # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. + os: [ ubuntu-22.04, ubuntu-22.04-arm ] + python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] + pytorch-version: [ '2.4.0', '2.5.1', '2.6.0', '2.7.0' ] + cuda-version: [ '12.4.1', '12.8.1' ] + # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. + # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. + # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) + # when building without C++11 ABI and using it on nvcr images. + cxx11_abi: [ 'FALSE', 'TRUE' ] + exclude: + # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix + # Pytorch < 2.5 does not support Python 3.13 + # PyTorch < 2.5 doesn’t support Python 3.13 + - pytorch-version: '2.4.0' + python-version: '3.13' + + # PyTorch 2.7.0 must only use CUDA 12.8.1 + - pytorch-version: '2.7.0' + cuda-version: '12.4.1' + + # All other PyTorch (< 2.7.0) must only use CUDA 12.4.1 + - pytorch-version: '2.4.0' + cuda-version: '12.8.1' + - pytorch-version: '2.5.1' + cuda-version: '12.8.1' + - pytorch-version: '2.6.0' + cuda-version: '12.8.1' steps: - name: Checkout @@ -90,7 +105,7 @@ jobs: - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} - uses: Jimver/cuda-toolkit@v0.2.19 + uses: Jimver/cuda-toolkit@v0.2.23 id: cuda-toolkit with: cuda: ${{ matrix.cuda-version }} @@ -142,10 +157,10 @@ jobs: strategy: fail-fast: false matrix: - os: ['ubuntu-20.04'] - python-version: ['3.10'] - pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt. - cuda-version: ['12.2.2'] + os: ['ubuntu-latest'] + python-version: ['3.12'] + pytorch-version: ['2.7.0'] # Must be the most recent version that meets requirements-cuda.txt. + cuda-version: [ '12.4.1' ] steps: - name: Checkout @@ -163,7 +178,7 @@ jobs: bash -x .github/workflows/scripts/env.sh - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -175,7 +190,7 @@ jobs: - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} - uses: Jimver/cuda-toolkit@v0.2.14 + uses: Jimver/cuda-toolkit@v0.2.23 id: cuda-toolkit with: cuda: ${{ matrix.cuda-version }} diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index f830091..6e6d732 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -3,6 +3,17 @@ python_executable=python$1 cuda_home=/usr/local/cuda-$2 +# Check if the CUDA version is < 12.8.1 +if [ "$2" = "12.8.1" ]; then + echo "CUDA version is 12.8.1, using the latest compatible version of flash-attn." + # Make sure release wheels are built for the following architectures + export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX" +else + echo "CUDA version is $2, using the latest compatible version of flash-attn." + # Make sure release wheels are built for the following architectures + export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" +fi + # Update paths PATH=${cuda_home}/bin:$PATH LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH @@ -13,8 +24,6 @@ $python_executable -m pip install flash_attn triton # Limit the number of parallel jobs to avoid OOM export MAX_JOBS=1 -# Make sure release wheels are built for the following architectures -export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" # Build if [ "$3" = sdist ]; then @@ -24,4 +33,4 @@ MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} -fi +fi \ No newline at end of file diff --git a/.github/workflows/scripts/cuda-install.sh b/.github/workflows/scripts/cuda-install.sh index 312c6e8..325de75 100644 --- a/.github/workflows/scripts/cuda-install.sh +++ b/.github/workflows/scripts/cuda-install.sh @@ -5,8 +5,37 @@ cuda_version=$(echo $1 | tr "." "-") # Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004 OS=$(echo $2 | tr -d ".\-") +ARCH=$(uname -m) +ARCH_TYPE=$ARCH + +# Detectar si es Tegra +if [[ "$ARCH" == "aarch64" ]]; then + if uname -a | grep -qi tegra; then + ARCH_TYPE="tegra-aarch64" + fi +fi + +echo "Detected architecture: ${ARCH_TYPE}" + # Installs CUDA -wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb +if [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then + # Jetson (Tegra) + wget -nv \ + https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \ + -O /etc/apt/preferences.d/cuda-repository-pin-600 + +elif [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then + # Jetson (Tegra) + wget -nv \ + https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \ + -O /etc/apt/preferences.d/cuda-repository-pin-600 +else + # ARM64 SBSA (Grace) + wget -nv \ + https://developer.download.nvidia.com/compute/cuda/repos/${OS}/sbsa/cuda-${DISTRO}.pin \ + -O /etc/apt/preferences.d/cuda-repository-pin-600 +fi + sudo dpkg -i cuda-keyring_1.1-1_all.deb rm cuda-keyring_1.1-1_all.deb sudo apt -qq update diff --git a/.github/workflows/scripts/pytorch-install.sh b/.github/workflows/scripts/pytorch-install.sh index 2c24ca8..0178a14 100644 --- a/.github/workflows/scripts/pytorch-install.sh +++ b/.github/workflows/scripts/pytorch-install.sh @@ -20,8 +20,8 @@ pip install typing-extensions==4.12.2 echo $MATRIX_CUDA_VERSION echo $MATRIX_TORCH_VERSION export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ -minv = {'2.2': 118, '2.3': 118, '2.4': 118, '2.5': 118, '2.6': 118}[env['MATRIX_TORCH_VERSION']]; \ -maxv = {'2.2': 121, '2.3': 121, '2.4': 124, '2.5': 124, '2.6': 124}[env['MATRIX_TORCH_VERSION']]; \ +minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118}[env['MATRIX_TORCH_VERSION']]; \ +maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128}[env['MATRIX_TORCH_VERSION']]; \ print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \ ) if [[ ${pytorch_version} == *"dev"* ]]; then diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 7cc737f..2cee0e2 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -20,8 +20,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest, windows-11-arm] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] exclude: - os: macos-latest python-version: '3.9' diff --git a/setup.py b/setup.py index 5da43ce..96ddbff 100644 --- a/setup.py +++ b/setup.py @@ -119,12 +119,15 @@ def get_minference_version() -> str: return str(version) -def get_platform(): +def get_arch(): """ - Returns the platform name as used in wheel filenames. + Returns the system aarch for the current system. """ if sys.platform.startswith("linux"): - return f"linux_{platform.uname().machine}" + if platform.machine() == "x86_64": + return "x86_64" + if platform.machine() == "arm64" or platform.machine() == "aarch64": + return "aarch64" elif sys.platform == "darwin": mac_version = ".".join(platform.mac_ver()[0].split(".")[:2]) return f"macosx_{mac_version}_x86_64" @@ -134,6 +137,28 @@ def get_platform(): raise ValueError("Unsupported platform: {}".format(sys.platform)) +def get_system() -> str: + """ + Returns the system name as used in wheel filenames. + """ + if platform.system() == "Windows": + return "win" + elif platform.system() == "Darwin": + mac_version = ".".join(platform.mac_ver()[0].split(".")[:1]) + return f"macos_{mac_version}" + elif platform.system() == "Linux": + return "linux" + else: + raise ValueError("Unsupported system: {}".format(platform.system())) + + +def get_platform() -> str: + """ + Returns the platform name as used in wheel filenames. + """ + return f"{get_system()}_{get_arch()}" + + def get_wheel_url(): # Determine the version numbers that will be used to determine the correct wheel # We're using the CUDA version used to build torch, not the one currently installed @@ -265,4 +290,4 @@ def __init__(self, *args, **kwargs) -> None: else { "bdist_wheel": CachedWheelsCommand, }, -) +) \ No newline at end of file