diff --git a/.github/scripts/build-cpu.sh b/.github/scripts/build-cpu.sh index 0ede8503c..1feaaced5 100644 --- a/.github/scripts/build-cpu.sh +++ b/.github/scripts/build-cpu.sh @@ -4,12 +4,6 @@ set -xeuo pipefail : "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows/macOS)}" : "${RUNNER_ARCH:?RUNNER_ARCH must be set (X64/ARM64)}" -if [[ "${RUNNER_OS}" == "Windows" ]]; then - pip install cmake==3.30.9 -else - pip install cmake==3.28.3 -fi - if [ "${RUNNER_OS}" == "macOS" ] && [ "${RUNNER_ARCH}" == "ARM64" ]; then cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DCOMPUTE_BACKEND=cpu . else diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 7537ba280..e15bbabdb 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -26,8 +26,6 @@ else [[ "${CUDA_VERSION}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120" fi -[[ "${RUNNER_OS}" == "Windows" ]] && python3 -m pip install ninja - if [ "${RUNNER_OS}" == "Linux" ]; then # We'll use Rocky Linux 8 in order to maintain manylinux 2.24 compatibility. image="nvidia/cuda:${CUDA_VERSION}-devel-rockylinux8" @@ -35,12 +33,11 @@ if [ "${RUNNER_OS}" == "Linux" ]; then docker run -i -w /src -v "$PWD:/src" "$image" bash -c \ "dnf -y --refresh update --security \ - && dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \ + && dnf -y install cmake gcc-toolset-11-toolchain --setopt=install_weak_deps=False --setopt=tsflags=nodocs \ && source scl_source enable gcc-toolset-11 \ && cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \ - && cmake --build . --config Release" + && cmake --build . --config Release --parallel" else - pip install cmake==3.28.3 cmake -G Ninja -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY="${build_capability}" -DCMAKE_BUILD_TYPE=Release -S . cmake --build . --config Release fi diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 9d452b8bc..375ee30b1 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -17,10 +17,9 @@ if [ "${RUNNER_OS}" == "Linux" ]; then echo "Using image $image" docker run --rm -i \ -w /src -v "$PWD:/src" "$image" sh -c \ - "apt-get update \ - && pip install cmake==3.31.6 \ + "pip install cmake==3.31.6 \ && cmake -DCOMPUTE_BACKEND=hip -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_HIP_FLAGS=\"--offload-compress\" -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \ - && cmake --build ." + && cmake --build . --parallel" else bnb_rocm_arch="gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201" diff --git a/.github/scripts/build-xpu.sh b/.github/scripts/build-xpu.sh index 9c9e51e41..5683ff3fb 100755 --- a/.github/scripts/build-xpu.sh +++ b/.github/scripts/build-xpu.sh @@ -14,7 +14,7 @@ if [ "${RUNNER_OS}" == "Linux" ]; then && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ cmake bison intel-fw-gpu intel-ocloc \ && cmake -DCOMPUTE_BACKEND=xpu . \ - && cmake --build . --config Release" + && cmake --build . --config Release --parallel" fi output_dir="output/${RUNNER_OS}/X64" diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2d0e7ca4b..81b65d253 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -36,7 +36,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup MSVC - if: startsWith(matrix.os, 'windows') + if: runner.os == 'Windows' uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl with: arch: ${{ runner.arch == 'ARM64' && 'arm64' || 'x64' }} @@ -48,6 +48,7 @@ jobs: name: shared_library_${{ runner.os }}_${{ runner.arch }} path: output/* retention-days: 7 + if-no-files-found: error ## # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64) @@ -64,7 +65,7 @@ jobs: - uses: actions/checkout@v4 # Windows: We install Cuda on the agent (slow) - uses: Jimver/cuda-toolkit@3d45d157f327c09c04b50ee6ccdea2d9d017ec76 # v0.2.35 - if: startsWith(matrix.os, 'windows') + if: runner.os == 'Windows' id: cuda-toolkit with: cuda: ${{ matrix.cuda_version }} @@ -75,7 +76,7 @@ jobs: use-local-cache: false log-file-suffix: ${{ runner.os }}-${{ runner.arch }}-${{matrix.cuda_version}}.txt - name: Setup MSVC - if: startsWith(matrix.os, 'windows') + if: runner.os == 'Windows' uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl with: toolset: "14.44" @@ -89,6 +90,7 @@ jobs: name: shared_library_cuda_${{ runner.os }}_${{ runner.arch }}_${{ matrix.cuda_version }} path: output/* retention-days: 7 + if-no-files-found: error build-xpu: strategy: @@ -110,6 +112,7 @@ jobs: name: shared_library_xpu_${{ runner.os }}_${{ runner.arch }} path: output/* retention-days: 7 + if-no-files-found: error build-rocm: strategy: @@ -123,7 +126,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Clean up disk space - if: startsWith(matrix.os, 'ubuntu') + if: runner.os == 'Linux' run: | echo "Disk space before cleanup:" df -h @@ -140,7 +143,7 @@ jobs: echo "Disk space after cleanup:" df -h - name: Setup MSVC - if: startsWith(matrix.os, 'windows') + if: runner.os == 'Windows' uses: ilammy/msvc-dev-cmd@v1.13.0 with: toolset: "14.44" @@ -154,6 +157,7 @@ jobs: name: shared_library_rocm_${{ runner.os }}_${{ runner.arch }}_${{ matrix.rocm_version }} path: output/* retention-days: 7 + if-no-files-found: error build-wheels: env: @@ -187,11 +191,10 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - # Python for Windows ARM64 is only available from 3.12+ - python-version: ${{ matrix.os == 'windows-11-arm' && '3.12' || '3.10' }} + python-version: "3.12" cache: pip - run: pip install build wheel - - run: python -m build . + - run: python -m build -w . - name: Determine and Set Platform Tag, then Tag Wheel shell: bash run: | @@ -204,6 +207,8 @@ jobs: name: bdist_wheel_${{ runner.os }}_${{ runner.arch }} path: dist/bitsandbytes-*.whl retention-days: 7 + if-no-files-found: error + compression-level: 0 upload-pre-release-wheels: name: Create release and upload artifacts diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index 717c73265..9745fe1b3 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -53,69 +53,31 @@ jobs: id: config shell: bash run: | - # Map platform to OS identifiers, architecture, and test runner + # Map platform to test runner case "${{ inputs.platform }}" in linux-x64) - BUILD_OS="ubuntu-22.04" - ARCH="x64" if [[ "${{ inputs.backend }}" == "cuda" ]]; then case "${{ inputs.gpu_type }}" in - T4) - TEST_RUNNER="bandb-aws-g4dn-4xlarge-plus-use1-public-80" - ;; - A10) - TEST_RUNNER="bandb-aws-g5-4xlarge-plus-use1-public-80" - ;; - L40S) - TEST_RUNNER="bandb-aws-g6e-4xlarge-plus-use1-public-80" - ;; - *) - echo "::error::Must specify gpu_type (T4, A10, L40S) for linux-x64 cuda backend" - exit 1 - ;; + T4) TEST_RUNNER="bandb-aws-g4dn-4xlarge-plus-use1-public-80" ;; + A10) TEST_RUNNER="bandb-aws-g5-4xlarge-plus-use1-public-80" ;; + L40S) TEST_RUNNER="bandb-aws-g6e-4xlarge-plus-use1-public-80" ;; + *) echo "::error::Must specify gpu_type (T4, A10, L40S) for linux-x64 cuda backend"; exit 1 ;; esac else case "${{ inputs.cpu_type }}" in - icelake) - TEST_RUNNER="banb-aws-general-8-plus-use1-public-80" - ;; - cascadelake) - TEST_RUNNER="bandb-aws-g4dn-4xlarge-plus-use1-public-80" - ;; - "") - TEST_RUNNER="ubuntu-22.04" - ;; - *) - echo "::error::Invalid cpu_type: ${{ inputs.cpu_type }}" - exit 1 - ;; + icelake) TEST_RUNNER="banb-aws-general-8-plus-use1-public-80" ;; + cascadelake) TEST_RUNNER="bandb-aws-g4dn-4xlarge-plus-use1-public-80" ;; + "") TEST_RUNNER="ubuntu-22.04" ;; + *) echo "::error::Invalid cpu_type: ${{ inputs.cpu_type }}"; exit 1 ;; esac fi ;; - linux-aarch64) - BUILD_OS="ubuntu-22.04-arm" - ARCH="aarch64" - TEST_RUNNER="ubuntu-22.04-arm" - ;; - macos) - BUILD_OS="macos-15" - ARCH="arm64" - TEST_RUNNER="macos-15" - ;; + linux-aarch64) TEST_RUNNER="ubuntu-22.04-arm" ;; + macos) TEST_RUNNER="macos-15" ;; windows) - BUILD_OS="windows-2025" - ARCH="x64" - if [[ "${{ inputs.backend }}" == "cuda" ]]; then - TEST_RUNNER="CUDA-Windows-x64" - else - TEST_RUNNER="windows-2025" - fi - ;; - windows-arm64) - BUILD_OS="windows-11-arm" - ARCH="arm64" - TEST_RUNNER="windows-11-arm" + [[ "${{ inputs.backend }}" == "cuda" ]] && TEST_RUNNER="CUDA-Windows-x64" || TEST_RUNNER="windows-2025" ;; + windows-arm64) TEST_RUNNER="windows-11-arm" ;; *) echo "::error::Unsupported platform: ${{ inputs.platform }}" exit 1 @@ -123,12 +85,9 @@ jobs: esac # Create unique artifact name per configuration - ARTIFACT="lib_${{ inputs.backend }}_${BUILD_OS}_${ARCH}" - if [[ "${{ inputs.backend }}" == "cuda" ]]; then - ARTIFACT="${ARTIFACT}_${{ inputs.cuda_version }}_${{ inputs.gpu_type }}" - else - ARTIFACT="${ARTIFACT}_${{ inputs.cpu_type }}" - fi + ARTIFACT="lib_${{ inputs.backend }}_${RUNNER_OS}_${RUNNER_ARCH}" + [[ "${{ inputs.backend }}" == "cuda" ]] && ARTIFACT="${ARTIFACT}_${{ inputs.cuda_version }}_${{ inputs.gpu_type }}" + [[ "${{ inputs.backend }}" == "cpu" ]] && ARTIFACT="${ARTIFACT}_${{ inputs.cpu_type }}" ARTIFACT="${ARTIFACT}_torch${{ inputs.torch_version }}_${{ github.run_id }}_${{ github.run_attempt }}" echo "test_runner=${TEST_RUNNER}" >> $GITHUB_OUTPUT @@ -174,6 +133,7 @@ jobs: name: ${{ steps.config.outputs.artifact_name }} path: output/${{ runner.os }}/${{ runner.arch }}/* retention-days: 7 + if-no-files-found: error test: needs: build