From be785d760b63ba60adaa40171f7707ccd9b8fff0 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 15 Jun 2026 12:59:04 -0400 Subject: [PATCH 01/11] Try improving VS2026 + CUDA < 13.2 compat. --- CMakeLists.txt | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d420edb1..ccf0a5938 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,10 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + # Define included source files set(CPP_FILES csrc/cpu_ops.cpp csrc/pythonInterface.cpp) set(GPU_FILES csrc/ops.cu csrc/kernels.cu) @@ -115,8 +119,6 @@ endif() if (BUILD_CPU) - set(CMAKE_CXX_STANDARD 17) - set(CMAKE_CXX_STANDARD_REQUIRED ON) string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" HOST_ARCH) if(MSVC) # Use the experimental OpenMP runtime for persistent thread pool support. @@ -127,16 +129,28 @@ if (BUILD_CPU) endif() if(BUILD_CUDA) + set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) + set(CMAKE_CUDA_EXTENSIONS OFF) + # NVCC normally will only work with MSVC up to 1939. VS2022 17.10+ starts using versions 1940+. # Workaround: use --allow-unsupported-compiler # This needs to be added *before* we try to enable the CUDA language so CMake's compiler check passes. + if(MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940) string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler") - # This is needed to build with VS2022 17.11+ and CUDA < 12.4. + # Suppress MSVC STL version mismatch errors when using a newer compiler than CUDA officially supports. if (MSVC_VERSION VERSION_GREATER_EQUAL 1941) string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH") endif() + + # VS2026 (MSVC 1950+): may need _ENABLE_EXTENDED_ALIGNED_STORAGE if CUDA headers + # use aligned_storage with extended alignment. This is independent of C++ standard. + # Commented out to verify if CMAKE_CUDA_STANDARD 17 alone is sufficient. + # if (MSVC_VERSION VERSION_GREATER_EQUAL 1950) + # string(APPEND CMAKE_CUDA_FLAGS " -D_ENABLE_EXTENDED_ALIGNED_STORAGE") + # endif() endif() enable_language(CUDA) # This will fail if CUDA is not found From 562fc4e021d34e8b8cd489767494c6a0c73fae0d Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 15 Jun 2026 12:59:37 -0400 Subject: [PATCH 02/11] Try improving VS2026 + CUDA < 13.2 compat. --- CMakeLists.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ccf0a5938..04175d47b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,21 +136,14 @@ if(BUILD_CUDA) # NVCC normally will only work with MSVC up to 1939. VS2022 17.10+ starts using versions 1940+. # Workaround: use --allow-unsupported-compiler # This needs to be added *before* we try to enable the CUDA language so CMake's compiler check passes. - if(MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940) string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler") # Suppress MSVC STL version mismatch errors when using a newer compiler than CUDA officially supports. + # This is needed to build with VS2022 17.11+ and CUDA < 12.4. if (MSVC_VERSION VERSION_GREATER_EQUAL 1941) string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH") endif() - - # VS2026 (MSVC 1950+): may need _ENABLE_EXTENDED_ALIGNED_STORAGE if CUDA headers - # use aligned_storage with extended alignment. This is independent of C++ standard. - # Commented out to verify if CMAKE_CUDA_STANDARD 17 alone is sufficient. - # if (MSVC_VERSION VERSION_GREATER_EQUAL 1950) - # string(APPEND CMAKE_CUDA_FLAGS " -D_ENABLE_EXTENDED_ALIGNED_STORAGE") - # endif() endif() enable_language(CUDA) # This will fail if CUDA is not found From 3346014e6686e1a5a395c1a0c4565b4c4350defe Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 15 Jun 2026 13:15:41 -0400 Subject: [PATCH 03/11] Try again --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 04175d47b..2e8adf099 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,6 +144,10 @@ if(BUILD_CUDA) if (MSVC_VERSION VERSION_GREATER_EQUAL 1941) string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH") endif() + + if (MSVC_VERSION VERSION_GREATER_EQUAL 1950) + string(APPEND CMAKE_CUDA_FLAGS " -D_ENABLE_EXTENDED_ALIGNED_STORAGE") + endif() endif() enable_language(CUDA) # This will fail if CUDA is not found From a2fa57503aee19bf9c2cf6b9f925f4a9c29e0c60 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:24:38 -0400 Subject: [PATCH 04/11] Downgrade to VS2022 for CUDA builds --- .github/scripts/build-cpu.sh | 12 ++++----- .github/scripts/build-cuda.sh | 32 +++++++++++------------ .github/scripts/build-rocm.sh | 30 ++++++++++----------- .github/scripts/build-xpu.sh | 10 +++---- .github/workflows/python-package.yml | 39 ++++++++++------------------ .github/workflows/test-runner.yml | 12 +++------ CMakeLists.txt | 3 --- 7 files changed, 59 insertions(+), 79 deletions(-) diff --git a/.github/scripts/build-cpu.sh b/.github/scripts/build-cpu.sh index 5db76ecce..0ede8503c 100644 --- a/.github/scripts/build-cpu.sh +++ b/.github/scripts/build-cpu.sh @@ -1,22 +1,22 @@ #!/bin/bash -declare build_arch -declare build_os - set -xeuo pipefail -if [[ "${build_os}" == windows* ]]; then +: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows/macOS)}" +: "${RUNNER_ARCH:?RUNNER_ARCH must be set (X64/ARM64)}" + +if [[ "${RUNNER_OS}" == "Windows" ]]; then pip install cmake==3.30.9 else pip install cmake==3.28.3 fi -if [ "${build_os:0:5}" == macos ] && [ "${build_arch}" == aarch64 ]; then +if [ "${RUNNER_OS}" == "macOS" ] && [ "${RUNNER_ARCH}" == "ARM64" ]; then cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DCOMPUTE_BACKEND=cpu . else cmake -DCOMPUTE_BACKEND=cpu . fi cmake --build . --config Release -output_dir="output/${build_os}/${build_arch}" +output_dir="output/${RUNNER_OS}/${RUNNER_ARCH}" mkdir -p "${output_dir}" (shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 9bb2f1a66..7537ba280 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -1,37 +1,36 @@ #!/bin/bash -declare build_arch -declare build_os -declare cuda_version -declare cuda_targets - set -xeuo pipefail -if [[ -v cuda_targets ]]; then - build_capability="${cuda_targets}" -elif [ "${build_arch}" = "aarch64" ]; then +: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows/macOS)}" +: "${RUNNER_ARCH:?RUNNER_ARCH must be set (X64/ARM64)}" +: "${CUDA_VERSION:?CUDA_VERSION must be set}" + +if [[ -v CUDA_TARGETS ]]; then + build_capability="${CUDA_TARGETS}" +elif [ "${RUNNER_ARCH}" = "ARM64" ]; then build_capability="75;80;90" # CUDA 12.8-12.9: Add sm100/sm120 - [[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120" + [[ "${CUDA_VERSION}" == 12.8.* || "${CUDA_VERSION}" == 12.9.* ]] && build_capability="75;80;90;100;120" # CUDA 13.0+: Add sm100/sm110/sm120 - [[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121" + [[ "${CUDA_VERSION}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121" else # By default, target Pascal through Hopper. build_capability="60;70;75;80;86;89;90" # CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum - [[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120" + [[ "${CUDA_VERSION}" == 12.8.* || "${CUDA_VERSION}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120" # CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum - [[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120" + [[ "${CUDA_VERSION}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120" fi -[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja +[[ "${RUNNER_OS}" == "Windows" ]] && python3 -m pip install ninja -if [ "${build_os:0:6}" == ubuntu ]; then +if [ "${RUNNER_OS}" == "Linux" ]; then # We'll use Rocky Linux 8 in order to maintain manylinux 2.24 compatibility. - image="nvidia/cuda:${cuda_version}-devel-rockylinux8" + image="nvidia/cuda:${CUDA_VERSION}-devel-rockylinux8" echo "Using image $image" docker run -i -w /src -v "$PWD:/src" "$image" bash -c \ @@ -46,7 +45,6 @@ else cmake --build . --config Release fi - -output_dir="output/${build_os}/${build_arch}" +output_dir="output/${RUNNER_OS}/${RUNNER_ARCH}" mkdir -p "${output_dir}" (shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 77bd2eaf5..782b740c1 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -1,21 +1,21 @@ #!/bin/bash -declare build_arch -declare build_os -declare rocm_version - set -xeuo pipefail + +: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows)}" +: "${ROCM_VERSION:?ROCM_VERSION must be set}" + bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101;gfx1102;gfx1103" # ROCm 6.4+ - Add RDNA4 and RDNA3.5 targets. Note we assume >=6.4.4. -[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201" +[[ "${ROCM_VERSION}" == 6.4.* || "${ROCM_VERSION}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201" # ROCm 7.0+ - Add gfx950 -[[ "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950" +[[ "${ROCM_VERSION}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950" -if [ "${build_os:0:6}" == ubuntu ]; then - image=rocm/dev-ubuntu-22.04:${rocm_version}-complete +if [ "${RUNNER_OS}" == "Linux" ]; then + image=rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete echo "Using image $image" - docker run --rm --platform "linux/$build_arch" -i \ + docker run --rm -i \ -w /src -v "$PWD:/src" "$image" sh -c \ "apt-get update \ && pip install cmake==3.31.6 \ @@ -27,12 +27,12 @@ else pip install ninja cmake==3.31.6 # Install ROCm SDK wheels from repo.radeon.com. - rocm_base_url="https://repo.radeon.com/rocm/windows/rocm-rel-${rocm_version}" + rocm_base_url="https://repo.radeon.com/rocm/windows/rocm-rel-${ROCM_VERSION}" pip install \ - "${rocm_base_url}/rocm_sdk_core-${rocm_version}-py3-none-win_amd64.whl" \ - "${rocm_base_url}/rocm_sdk_devel-${rocm_version}-py3-none-win_amd64.whl" \ - "${rocm_base_url}/rocm_sdk_libraries_custom-${rocm_version}-py3-none-win_amd64.whl" \ - "${rocm_base_url}/rocm-${rocm_version}.tar.gz" + "${rocm_base_url}/rocm_sdk_core-${ROCM_VERSION}-py3-none-win_amd64.whl" \ + "${rocm_base_url}/rocm_sdk_devel-${ROCM_VERSION}-py3-none-win_amd64.whl" \ + "${rocm_base_url}/rocm_sdk_libraries_custom-${ROCM_VERSION}-py3-none-win_amd64.whl" \ + "${rocm_base_url}/rocm-${ROCM_VERSION}.tar.gz" # Expand the devel tarball rocm-sdk init @@ -50,6 +50,6 @@ else cmake --build . fi -output_dir="output/${build_os}/${build_arch}" +output_dir="output/${RUNNER_OS}/X64" mkdir -p "${output_dir}" (shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") diff --git a/.github/scripts/build-xpu.sh b/.github/scripts/build-xpu.sh index d069e1230..9c9e51e41 100755 --- a/.github/scripts/build-xpu.sh +++ b/.github/scripts/build-xpu.sh @@ -1,10 +1,10 @@ #!/bin/bash -declare build_os - set -xeuo pipefail -# We currently only build XPU on Linux. -if [ "${build_os:0:6}" == ubuntu ]; then +: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows)}" + +# We currently only build XPU on Linux x64 and Windows x64. +if [ "${RUNNER_OS}" == "Linux" ]; then # TODO: We might want to pre-build this as our own customized image in the future. image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04 echo "Using image $image" @@ -17,6 +17,6 @@ if [ "${build_os:0:6}" == ubuntu ]; then && cmake --build . --config Release" fi -output_dir="output/${build_os}/x86_64" +output_dir="output/${RUNNER_OS}/X64" mkdir -p "${output_dir}" (shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}") diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0e88ba018..4806007a9 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -52,13 +52,10 @@ jobs: arch: ${{ matrix.arch == 'arm64' && 'arm64' || 'x64' }} - name: Build C++ run: bash .github/scripts/build-cpu.sh - env: - build_os: ${{ matrix.os }} - build_arch: ${{ matrix.arch }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_${{ matrix.os }}_${{ matrix.arch }} + name: shared_library_${{ runner.os }}_${{ runner.arch }} path: output/* retention-days: 7 @@ -69,13 +66,13 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] + os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022] include: - os: ubuntu-22.04 arch: x86_64 - os: ubuntu-22.04-arm arch: aarch64 - - os: windows-2025 + - os: windows-2022 arch: x86_64 cuda_version: ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] @@ -93,45 +90,39 @@ jobs: sub-packages: ${{ format('["nvcc"{0},"cudart","cublas","thrust","cublas_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }} use-github-cache: false use-local-cache: false - log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt + log-file-suffix: ${{ runner.os }}-${{ runner.arch }}-${{matrix.cuda_version}}.txt - name: Setup MSVC if: startsWith(matrix.os, 'windows') uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl - name: Build C++ run: bash .github/scripts/build-cuda.sh env: - build_os: ${{ matrix.os }} - build_arch: ${{ matrix.arch }} - cuda_version: ${{ matrix.cuda_version }} + CUDA_VERSION: ${{ matrix.cuda_version }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }} + name: shared_library_cuda_${{ runner.os }}_${{ runner.arch }}_${{ matrix.cuda_version }} path: output/* retention-days: 7 build-xpu: strategy: matrix: - os: [ubuntu-22.04, windows-2025] + os: [ubuntu-22.04, windows-2022] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Build C++ (Linux) if: runner.os == 'Linux' run: bash .github/scripts/build-xpu.sh - env: - build_os: ${{ matrix.os }} - name: Build C++ (Windows) if: runner.os == 'Windows' run: .github/scripts/build-xpu-windows.bat shell: cmd - env: - build_os: ${{ matrix.os }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_xpu_${{ matrix.os }}_x86_64 + name: shared_library_xpu_${{ runner.os }}_${{ runner.arch }} path: output/* retention-days: 7 @@ -171,13 +162,11 @@ jobs: - name: Build C++ run: bash .github/scripts/build-rocm.sh env: - build_os: ${{ matrix.os }} - build_arch: ${{ matrix.arch }} - rocm_version: ${{ matrix.rocm_version }} + ROCM_VERSION: ${{ matrix.rocm_version }} - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }} + name: shared_library_rocm_${{ runner.os }}_${{ runner.arch }}_${{ matrix.rocm_version }} path: output/* retention-days: 7 @@ -220,13 +209,13 @@ jobs: uses: actions/download-artifact@v4 with: merge-multiple: true - pattern: "shared_library*_${{ matrix.os }}_${{ matrix.arch }}*" + pattern: "shared_library*_${{ runner.os }}_${{ runner.arch }}*" path: output/ - name: Copy correct platform shared library shell: bash run: | ls -lR output/ - cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/ + cp output/${{ runner.os }}/${{ runner.arch }}/* bitsandbytes/ - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -243,7 +232,7 @@ jobs: - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }} + name: bdist_wheel_${{ runner.os }}_${{ runner.arch }} path: dist/bitsandbytes-*.whl retention-days: 7 @@ -406,7 +395,7 @@ jobs: - name: Download wheel uses: actions/download-artifact@v4 with: - name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }} + name: bdist_wheel_${{ runner.os }}_${{ runner.arch }} path: wheels/ - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index d1e81bc32..db1414375 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -43,6 +43,7 @@ jobs: inputs.platform == 'linux-aarch64' && 'ubuntu-22.04-arm' || inputs.platform == 'macos' && 'macos-15' || inputs.platform == 'windows-arm64' && 'windows-11-arm' || + (inputs.platform == 'windows' && inputs.backend == 'cuda') && 'windows-2022' || 'windows-2025' }} outputs: @@ -140,11 +141,6 @@ jobs: - uses: actions/checkout@v4 - - name: Set build environment variables - shell: bash - run: | - echo "build_os=${{ steps.config.outputs.build_os }}" >> $GITHUB_ENV - echo "build_arch=${{ steps.config.outputs.arch }}" >> $GITHUB_ENV # Windows + CUDA: Install CUDA Toolkit - name: Install CUDA Toolkit @@ -173,14 +169,14 @@ jobs: if: inputs.backend == 'cuda' run: bash .github/scripts/build-cuda.sh env: - cuda_version: ${{ inputs.cuda_version }} - cuda_targets: "75;80;89" + CUDA_VERSION: ${{ inputs.cuda_version }} + CUDA_TARGETS: "75;80;89" - name: Upload build artifact uses: actions/upload-artifact@v4 with: name: ${{ steps.config.outputs.artifact_name }} - path: output/${{ steps.config.outputs.build_os }}/${{ steps.config.outputs.arch }}/* + path: output/${{ runner.os }}/${{ runner.arch }}/* retention-days: 7 test: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e8adf099..fc7e41aac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,9 +145,6 @@ if(BUILD_CUDA) string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH") endif() - if (MSVC_VERSION VERSION_GREATER_EQUAL 1950) - string(APPEND CMAKE_CUDA_FLAGS " -D_ENABLE_EXTENDED_ALIGNED_STORAGE") - endif() endif() enable_language(CUDA) # This will fail if CUDA is not found From 363df921b21b1e7404e3eb5981822397fd297866 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 16 Jun 2026 17:22:59 -0400 Subject: [PATCH 05/11] Update windows ROCm/XPU builds --- .github/scripts/build-rocm.sh | 2 -- .github/scripts/build-xpu-windows.bat | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index 782b740c1..d1bdd9765 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -24,8 +24,6 @@ if [ "${RUNNER_OS}" == "Linux" ]; then else bnb_rocm_arch="gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201" - pip install ninja cmake==3.31.6 - # Install ROCm SDK wheels from repo.radeon.com. rocm_base_url="https://repo.radeon.com/rocm/windows/rocm-rel-${ROCM_VERSION}" pip install \ diff --git a/.github/scripts/build-xpu-windows.bat b/.github/scripts/build-xpu-windows.bat index c7317b8a7..02d281d6e 100644 --- a/.github/scripts/build-xpu-windows.bat +++ b/.github/scripts/build-xpu-windows.bat @@ -29,6 +29,6 @@ if ERRORLEVEL 1 ( ) echo ::endgroup:: -set output_dir=output\%build_os%\x86_64 +set output_dir=output\Windows\X64 if not exist "%output_dir%" mkdir "%output_dir%" copy bitsandbytes\*.dll "%output_dir%\" 2>nul From c4beacbf4bd659ec6f02d1ab8544f7495f187d46 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:11:08 -0400 Subject: [PATCH 06/11] update rocm build --- .github/scripts/build-rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh index d1bdd9765..9d452b8bc 100644 --- a/.github/scripts/build-rocm.sh +++ b/.github/scripts/build-rocm.sh @@ -35,15 +35,15 @@ else # Expand the devel tarball rocm-sdk init - ROCM_PATH="$(rocm-sdk path --root)" - export ROCM_PATH - export PATH="${ROCM_PATH}/bin:${PATH}" + ROCM_PATH="$(rocm-sdk path --root | tr '\\' '/')" + export ROCM_PATH PATH="${ROCM_PATH}/bin:${PATH}" cmake -G Ninja \ -DCOMPUTE_BACKEND=hip \ -DBNB_ROCM_ARCH="${bnb_rocm_arch}" \ -DCMAKE_BUILD_TYPE=MinSizeRel \ -DCMAKE_HIP_FLAGS="--offload-compress" \ + -DCMAKE_HIP_COMPILER_ROCM_ROOT="${ROCM_PATH}" \ -S . cmake --build . fi From e40e8edef0c57a62e1a6797c538725eb56e61182 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:26:19 -0400 Subject: [PATCH 07/11] update windows rocm --- .github/workflows/python-package.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4806007a9..dcebbe907 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -159,6 +159,8 @@ jobs: - name: Setup MSVC if: startsWith(matrix.os, 'windows') uses: ilammy/msvc-dev-cmd@v1.13.0 + with: + toolset: "14.44" - name: Build C++ run: bash .github/scripts/build-rocm.sh env: From f8c4e5b9a025b6f3e828106cfa6ff98ed1244d69 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Wed, 17 Jun 2026 13:57:53 -0400 Subject: [PATCH 08/11] try cuda windows-2025 again --- .github/workflows/python-package.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index dcebbe907..2dfb9654d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -66,13 +66,13 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022] + os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] include: - os: ubuntu-22.04 arch: x86_64 - os: ubuntu-22.04-arm arch: aarch64 - - os: windows-2022 + - os: windows-2025 arch: x86_64 cuda_version: ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] @@ -94,6 +94,8 @@ jobs: - name: Setup MSVC if: startsWith(matrix.os, 'windows') uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl + with: + toolset: "14.44" - name: Build C++ run: bash .github/scripts/build-cuda.sh env: From 1756d7d6f1130d94a2e59fe25d558842ad39b116 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:57:16 -0400 Subject: [PATCH 09/11] update doc --- docs/source/installation.mdx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 9a335f2d8..1a706dee4 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -174,7 +174,7 @@ The currently distributed `bitsandbytes` packages are built with the following c |--------------------|----------------------|----------------------| | **Linux x86-64** | GCC 11.4 | AVX2 | | **Linux aarch64** | GCC 11.4 | | -| **Windows x86-64** | MSVC 19.43+ (VS2022) | AVX2 | +| **Windows x86-64** | MSVC 19.51+ (VS2026) | AVX2 | | **Windows arm64** | MSVC 19.43+ (VS2022) | ARM NEON | | **macOS arm64** | Apple Clang 17 | | @@ -201,6 +201,8 @@ pip install -e . +Requires Visual Studio 2022 or 2026. + ```bash git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ pip install -e . @@ -209,7 +211,7 @@ pip install -e . -Requires Visual Studio 2022 with the **ARM64 C++ build tools** component, Python >= **3.12**, and PyTorch >= **2.12**. +Requires Visual Studio 2022 or 2026 with the **ARM64 C++ build tools** component, Python >= **3.12**, and PyTorch >= **2.12**. ```bash git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ @@ -280,7 +282,7 @@ pip install -e . # `-e` for "editable" install, when developing BNB (otherwise -Compilation on Windows requires Visual Studio with C++ support, CMake, Ninja, and Python >= **3.10**. +Compilation on Windows requires Visual Studio 2022 with C++ support, CMake, Ninja, and Python >= **3.10**. Instead of a system-wide ROCm installation, you can use the pip-installable ROCm SDK wheels from [repo.radeon.com](https://repo.radeon.com/rocm/windows/): From bc9993fd539016003e6e6ce10631b001010ebfe0 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:57:32 -0400 Subject: [PATCH 10/11] update test workflow --- .github/workflows/test-runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index db1414375..d64e66639 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -43,7 +43,6 @@ jobs: inputs.platform == 'linux-aarch64' && 'ubuntu-22.04-arm' || inputs.platform == 'macos' && 'macos-15' || inputs.platform == 'windows-arm64' && 'windows-11-arm' || - (inputs.platform == 'windows' && inputs.backend == 'cuda') && 'windows-2022' || 'windows-2025' }} outputs: @@ -158,6 +157,7 @@ jobs: uses: ilammy/msvc-dev-cmd@v1.13.0 with: arch: ${{ inputs.platform == 'windows-arm64' && 'arm64' || 'x64' }} + toolset: ${{ (inputs.platform == 'windows' && inputs.backend == 'cuda') && '14.44' || '' }} # Build CPU backend - name: Build C++ From 96771e2a19ddbf62ebdcbee3692410dad89eb626 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:07:24 -0400 Subject: [PATCH 11/11] cleanup --- .github/scripts/set_platform_tag.py | 8 +++-- .github/workflows/python-package.yml | 52 ++++------------------------ .github/workflows/test-runner.yml | 4 --- 3 files changed, 11 insertions(+), 53 deletions(-) diff --git a/.github/scripts/set_platform_tag.py b/.github/scripts/set_platform_tag.py index 1ffeeec9d..0186d41c4 100644 --- a/.github/scripts/set_platform_tag.py +++ b/.github/scripts/set_platform_tag.py @@ -4,14 +4,16 @@ def get_platform_tag(architecture): + arch = architecture.lower() + is_x64 = arch in ("x86_64", "x64") system = platform.system() if system == "Linux": - tag = "manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64" + tag = "manylinux_2_24_x86_64" if is_x64 else "manylinux_2_24_aarch64" elif system == "Darwin": tag = "macosx_14_0_arm64" elif system == "Windows": - tag = "win_amd64" if architecture == "x86_64" else "win_arm64" + tag = "win_amd64" if is_x64 else "win_arm64" else: sys.exit(f"Unsupported system: {system}") @@ -20,7 +22,7 @@ def get_platform_tag(architecture): def main(): parser = argparse.ArgumentParser(description="Determine platform tag.") - parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64)") + parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64, X64, ARM64)") args = parser.parse_args() tag = get_platform_tag(args.arch) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2dfb9654d..2d0e7ca4b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -31,17 +31,7 @@ jobs: build-cpu: strategy: matrix: - include: - - os: ubuntu-22.04 - arch: x86_64 - - os: ubuntu-22.04-arm - arch: aarch64 - - os: windows-2025 - arch: x86_64 - - os: windows-11-arm - arch: arm64 - - os: macos-15 - arch: arm64 + os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, windows-11-arm, macos-15] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -49,7 +39,7 @@ jobs: if: startsWith(matrix.os, 'windows') uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl with: - arch: ${{ matrix.arch == 'arm64' && 'arm64' || 'x64' }} + arch: ${{ runner.arch == 'ARM64' && 'arm64' || 'x64' }} - name: Build C++ run: bash .github/scripts/build-cpu.sh - name: Upload build artifact @@ -67,13 +57,6 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] - include: - - os: ubuntu-22.04 - arch: x86_64 - - os: ubuntu-22.04-arm - arch: aarch64 - - os: windows-2025 - arch: x86_64 cuda_version: ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] runs-on: ${{ matrix.os }} @@ -132,11 +115,9 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - arch: [x86_64] rocm_version: ["6.2.4", "6.3.4", "6.4.4", "7.0.2", "7.1.1", "7.2.3"] include: - os: windows-2025 - arch: x86_64 rocm_version: "7.2.1" runs-on: ${{ matrix.os }} steps: @@ -186,23 +167,6 @@ jobs: strategy: matrix: os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, windows-11-arm, macos-15] - include: - - os: ubuntu-22.04 - arch: x86_64 - python-version: "3.10" - - os: ubuntu-22.04-arm - arch: aarch64 - python-version: "3.10" - - os: windows-2025 - arch: x86_64 - python-version: "3.10" - - os: windows-11-arm - arch: arm64 - # Python for Windows ARM64 is only available from 3.12+ - python-version: "3.12" - - os: macos-15 - arch: arm64 - python-version: "3.10" # The specific Python version is irrelevant in this context as we are only packaging non-C extension # code. This ensures compatibility across Python versions, as compatibility is # dictated by the packaged code itself, not the Python version used for packaging. @@ -220,17 +184,18 @@ jobs: run: | ls -lR output/ cp output/${{ runner.os }}/${{ runner.arch }}/* bitsandbytes/ - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + # Python for Windows ARM64 is only available from 3.12+ + python-version: ${{ matrix.os == 'windows-11-arm' && '3.12' || '3.10' }} cache: pip - run: pip install build wheel - run: python -m build . - name: Determine and Set Platform Tag, then Tag Wheel shell: bash run: | - PLATFORM_TAG=$(python .github/scripts/set_platform_tag.py "${{ matrix.arch }}") + PLATFORM_TAG=$(python .github/scripts/set_platform_tag.py "${{ runner.arch }}") echo "PLATFORM_TAG=$PLATFORM_TAG" wheel tags --remove --abi-tag=none --python-tag=py3 --platform-tag=$PLATFORM_TAG dist/bitsandbytes-*.whl - name: Upload build artifact @@ -386,11 +351,6 @@ jobs: strategy: matrix: os: [ubuntu-22.04, ubuntu-22.04-arm] - include: - - os: ubuntu-22.04 - arch: x86_64 - - os: ubuntu-22.04-arm - arch: aarch64 runs-on: ${{ matrix.os }} env: PIP_DISABLE_PIP_VERSION_CHECK: 1 diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index d64e66639..717c73265 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -48,8 +48,6 @@ jobs: outputs: test_runner: ${{ steps.config.outputs.test_runner }} artifact_name: ${{ steps.config.outputs.artifact_name }} - build_os: ${{ steps.config.outputs.build_os }} - arch: ${{ steps.config.outputs.arch }} steps: - name: Configure test runner and paths id: config @@ -135,8 +133,6 @@ jobs: echo "test_runner=${TEST_RUNNER}" >> $GITHUB_OUTPUT echo "artifact_name=${ARTIFACT}" >> $GITHUB_OUTPUT - echo "build_os=${BUILD_OS}" >> $GITHUB_OUTPUT - echo "arch=${ARCH}" >> $GITHUB_OUTPUT - uses: actions/checkout@v4