diff --git a/.github/scripts/build-cpu.sh b/.github/scripts/build-cpu.sh
index 5db76ecce..0ede8503c 100644
--- a/.github/scripts/build-cpu.sh
+++ b/.github/scripts/build-cpu.sh
@@ -1,22 +1,22 @@
#!/bin/bash
-declare build_arch
-declare build_os
-
set -xeuo pipefail
-if [[ "${build_os}" == windows* ]]; then
+: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows/macOS)}"
+: "${RUNNER_ARCH:?RUNNER_ARCH must be set (X64/ARM64)}"
+
+if [[ "${RUNNER_OS}" == "Windows" ]]; then
pip install cmake==3.30.9
else
pip install cmake==3.28.3
fi
-if [ "${build_os:0:5}" == macos ] && [ "${build_arch}" == aarch64 ]; then
+if [ "${RUNNER_OS}" == "macOS" ] && [ "${RUNNER_ARCH}" == "ARM64" ]; then
cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DCOMPUTE_BACKEND=cpu .
else
cmake -DCOMPUTE_BACKEND=cpu .
fi
cmake --build . --config Release
-output_dir="output/${build_os}/${build_arch}"
+output_dir="output/${RUNNER_OS}/${RUNNER_ARCH}"
mkdir -p "${output_dir}"
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh
index 9bb2f1a66..7537ba280 100644
--- a/.github/scripts/build-cuda.sh
+++ b/.github/scripts/build-cuda.sh
@@ -1,37 +1,36 @@
#!/bin/bash
-declare build_arch
-declare build_os
-declare cuda_version
-declare cuda_targets
-
set -xeuo pipefail
-if [[ -v cuda_targets ]]; then
- build_capability="${cuda_targets}"
-elif [ "${build_arch}" = "aarch64" ]; then
+: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows/macOS)}"
+: "${RUNNER_ARCH:?RUNNER_ARCH must be set (X64/ARM64)}"
+: "${CUDA_VERSION:?CUDA_VERSION must be set}"
+
+if [[ -v CUDA_TARGETS ]]; then
+ build_capability="${CUDA_TARGETS}"
+elif [ "${RUNNER_ARCH}" = "ARM64" ]; then
build_capability="75;80;90"
# CUDA 12.8-12.9: Add sm100/sm120
- [[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
+ [[ "${CUDA_VERSION}" == 12.8.* || "${CUDA_VERSION}" == 12.9.* ]] && build_capability="75;80;90;100;120"
# CUDA 13.0+: Add sm100/sm110/sm120
- [[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121"
+ [[ "${CUDA_VERSION}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121"
else
# By default, target Pascal through Hopper.
build_capability="60;70;75;80;86;89;90"
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
- [[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
+ [[ "${CUDA_VERSION}" == 12.8.* || "${CUDA_VERSION}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
- [[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
+ [[ "${CUDA_VERSION}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
fi
-[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
+[[ "${RUNNER_OS}" == "Windows" ]] && python3 -m pip install ninja
-if [ "${build_os:0:6}" == ubuntu ]; then
+if [ "${RUNNER_OS}" == "Linux" ]; then
# We'll use Rocky Linux 8 in order to maintain manylinux 2.24 compatibility.
- image="nvidia/cuda:${cuda_version}-devel-rockylinux8"
+ image="nvidia/cuda:${CUDA_VERSION}-devel-rockylinux8"
echo "Using image $image"
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
@@ -46,7 +45,6 @@ else
cmake --build . --config Release
fi
-
-output_dir="output/${build_os}/${build_arch}"
+output_dir="output/${RUNNER_OS}/${RUNNER_ARCH}"
mkdir -p "${output_dir}"
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
diff --git a/.github/scripts/build-rocm.sh b/.github/scripts/build-rocm.sh
index 77bd2eaf5..9d452b8bc 100644
--- a/.github/scripts/build-rocm.sh
+++ b/.github/scripts/build-rocm.sh
@@ -1,21 +1,21 @@
#!/bin/bash
-declare build_arch
-declare build_os
-declare rocm_version
-
set -xeuo pipefail
+
+: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows)}"
+: "${ROCM_VERSION:?ROCM_VERSION must be set}"
+
bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101;gfx1102;gfx1103"
# ROCm 6.4+ - Add RDNA4 and RDNA3.5 targets. Note we assume >=6.4.4.
-[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201"
+[[ "${ROCM_VERSION}" == 6.4.* || "${ROCM_VERSION}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201"
# ROCm 7.0+ - Add gfx950
-[[ "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
+[[ "${ROCM_VERSION}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
-if [ "${build_os:0:6}" == ubuntu ]; then
- image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
+if [ "${RUNNER_OS}" == "Linux" ]; then
+ image=rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete
echo "Using image $image"
- docker run --rm --platform "linux/$build_arch" -i \
+ docker run --rm -i \
-w /src -v "$PWD:/src" "$image" sh -c \
"apt-get update \
&& pip install cmake==3.31.6 \
@@ -24,32 +24,30 @@ if [ "${build_os:0:6}" == ubuntu ]; then
else
bnb_rocm_arch="gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"
- pip install ninja cmake==3.31.6
-
# Install ROCm SDK wheels from repo.radeon.com.
- rocm_base_url="https://repo.radeon.com/rocm/windows/rocm-rel-${rocm_version}"
+ rocm_base_url="https://repo.radeon.com/rocm/windows/rocm-rel-${ROCM_VERSION}"
pip install \
- "${rocm_base_url}/rocm_sdk_core-${rocm_version}-py3-none-win_amd64.whl" \
- "${rocm_base_url}/rocm_sdk_devel-${rocm_version}-py3-none-win_amd64.whl" \
- "${rocm_base_url}/rocm_sdk_libraries_custom-${rocm_version}-py3-none-win_amd64.whl" \
- "${rocm_base_url}/rocm-${rocm_version}.tar.gz"
+ "${rocm_base_url}/rocm_sdk_core-${ROCM_VERSION}-py3-none-win_amd64.whl" \
+ "${rocm_base_url}/rocm_sdk_devel-${ROCM_VERSION}-py3-none-win_amd64.whl" \
+ "${rocm_base_url}/rocm_sdk_libraries_custom-${ROCM_VERSION}-py3-none-win_amd64.whl" \
+ "${rocm_base_url}/rocm-${ROCM_VERSION}.tar.gz"
# Expand the devel tarball
rocm-sdk init
- ROCM_PATH="$(rocm-sdk path --root)"
- export ROCM_PATH
- export PATH="${ROCM_PATH}/bin:${PATH}"
+ ROCM_PATH="$(rocm-sdk path --root | tr '\\' '/')"
+ export ROCM_PATH PATH="${ROCM_PATH}/bin:${PATH}"
cmake -G Ninja \
-DCOMPUTE_BACKEND=hip \
-DBNB_ROCM_ARCH="${bnb_rocm_arch}" \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DCMAKE_HIP_FLAGS="--offload-compress" \
+ -DCMAKE_HIP_COMPILER_ROCM_ROOT="${ROCM_PATH}" \
-S .
cmake --build .
fi
-output_dir="output/${build_os}/${build_arch}"
+output_dir="output/${RUNNER_OS}/X64"
mkdir -p "${output_dir}"
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
diff --git a/.github/scripts/build-xpu-windows.bat b/.github/scripts/build-xpu-windows.bat
index c7317b8a7..02d281d6e 100644
--- a/.github/scripts/build-xpu-windows.bat
+++ b/.github/scripts/build-xpu-windows.bat
@@ -29,6 +29,6 @@ if ERRORLEVEL 1 (
)
echo ::endgroup::
-set output_dir=output\%build_os%\x86_64
+set output_dir=output\Windows\X64
if not exist "%output_dir%" mkdir "%output_dir%"
copy bitsandbytes\*.dll "%output_dir%\" 2>nul
diff --git a/.github/scripts/build-xpu.sh b/.github/scripts/build-xpu.sh
index d069e1230..9c9e51e41 100755
--- a/.github/scripts/build-xpu.sh
+++ b/.github/scripts/build-xpu.sh
@@ -1,10 +1,10 @@
#!/bin/bash
-declare build_os
-
set -xeuo pipefail
-# We currently only build XPU on Linux.
-if [ "${build_os:0:6}" == ubuntu ]; then
+: "${RUNNER_OS:?RUNNER_OS must be set (Linux/Windows)}"
+
+# We currently only build XPU on Linux x64 and Windows x64.
+if [ "${RUNNER_OS}" == "Linux" ]; then
# TODO: We might want to pre-build this as our own customized image in the future.
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
echo "Using image $image"
@@ -17,6 +17,6 @@ if [ "${build_os:0:6}" == ubuntu ]; then
&& cmake --build . --config Release"
fi
-output_dir="output/${build_os}/x86_64"
+output_dir="output/${RUNNER_OS}/X64"
mkdir -p "${output_dir}"
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
diff --git a/.github/scripts/set_platform_tag.py b/.github/scripts/set_platform_tag.py
index 1ffeeec9d..0186d41c4 100644
--- a/.github/scripts/set_platform_tag.py
+++ b/.github/scripts/set_platform_tag.py
@@ -4,14 +4,16 @@
def get_platform_tag(architecture):
+ arch = architecture.lower()
+ is_x64 = arch in ("x86_64", "x64")
system = platform.system()
if system == "Linux":
- tag = "manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64"
+ tag = "manylinux_2_24_x86_64" if is_x64 else "manylinux_2_24_aarch64"
elif system == "Darwin":
tag = "macosx_14_0_arm64"
elif system == "Windows":
- tag = "win_amd64" if architecture == "x86_64" else "win_arm64"
+ tag = "win_amd64" if is_x64 else "win_arm64"
else:
sys.exit(f"Unsupported system: {system}")
@@ -20,7 +22,7 @@ def get_platform_tag(architecture):
def main():
parser = argparse.ArgumentParser(description="Determine platform tag.")
- parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64)")
+ parser.add_argument("arch", type=str, help="Architecture (e.g., x86_64, aarch64, X64, ARM64)")
args = parser.parse_args()
tag = get_platform_tag(args.arch)
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 0e88ba018..2d0e7ca4b 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -31,17 +31,7 @@ jobs:
build-cpu:
strategy:
matrix:
- include:
- - os: ubuntu-22.04
- arch: x86_64
- - os: ubuntu-22.04-arm
- arch: aarch64
- - os: windows-2025
- arch: x86_64
- - os: windows-11-arm
- arch: arm64
- - os: macos-15
- arch: arm64
+ os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, windows-11-arm, macos-15]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
@@ -49,16 +39,13 @@ jobs:
if: startsWith(matrix.os, 'windows')
uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl
with:
- arch: ${{ matrix.arch == 'arm64' && 'arm64' || 'x64' }}
+ arch: ${{ runner.arch == 'ARM64' && 'arm64' || 'x64' }}
- name: Build C++
run: bash .github/scripts/build-cpu.sh
- env:
- build_os: ${{ matrix.os }}
- build_arch: ${{ matrix.arch }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
- name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
+ name: shared_library_${{ runner.os }}_${{ runner.arch }}
path: output/*
retention-days: 7
@@ -70,13 +57,6 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
- include:
- - os: ubuntu-22.04
- arch: x86_64
- - os: ubuntu-22.04-arm
- arch: aarch64
- - os: windows-2025
- arch: x86_64
cuda_version:
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"]
runs-on: ${{ matrix.os }}
@@ -93,45 +73,41 @@ jobs:
sub-packages: ${{ format('["nvcc"{0},"cudart","cublas","thrust","cublas_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }}
use-github-cache: false
use-local-cache: false
- log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt
+ log-file-suffix: ${{ runner.os }}-${{ runner.arch }}-${{matrix.cuda_version}}.txt
- name: Setup MSVC
if: startsWith(matrix.os, 'windows')
uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl
+ with:
+ toolset: "14.44"
- name: Build C++
run: bash .github/scripts/build-cuda.sh
env:
- build_os: ${{ matrix.os }}
- build_arch: ${{ matrix.arch }}
- cuda_version: ${{ matrix.cuda_version }}
+ CUDA_VERSION: ${{ matrix.cuda_version }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
- name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }}
+ name: shared_library_cuda_${{ runner.os }}_${{ runner.arch }}_${{ matrix.cuda_version }}
path: output/*
retention-days: 7
build-xpu:
strategy:
matrix:
- os: [ubuntu-22.04, windows-2025]
+ os: [ubuntu-22.04, windows-2022]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Build C++ (Linux)
if: runner.os == 'Linux'
run: bash .github/scripts/build-xpu.sh
- env:
- build_os: ${{ matrix.os }}
- name: Build C++ (Windows)
if: runner.os == 'Windows'
run: .github/scripts/build-xpu-windows.bat
shell: cmd
- env:
- build_os: ${{ matrix.os }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
- name: shared_library_xpu_${{ matrix.os }}_x86_64
+ name: shared_library_xpu_${{ runner.os }}_${{ runner.arch }}
path: output/*
retention-days: 7
@@ -139,11 +115,9 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04]
- arch: [x86_64]
rocm_version: ["6.2.4", "6.3.4", "6.4.4", "7.0.2", "7.1.1", "7.2.3"]
include:
- os: windows-2025
- arch: x86_64
rocm_version: "7.2.1"
runs-on: ${{ matrix.os }}
steps:
@@ -168,16 +142,16 @@ jobs:
- name: Setup MSVC
if: startsWith(matrix.os, 'windows')
uses: ilammy/msvc-dev-cmd@v1.13.0
+ with:
+ toolset: "14.44"
- name: Build C++
run: bash .github/scripts/build-rocm.sh
env:
- build_os: ${{ matrix.os }}
- build_arch: ${{ matrix.arch }}
- rocm_version: ${{ matrix.rocm_version }}
+ ROCM_VERSION: ${{ matrix.rocm_version }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
- name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }}
+ name: shared_library_rocm_${{ runner.os }}_${{ runner.arch }}_${{ matrix.rocm_version }}
path: output/*
retention-days: 7
@@ -193,23 +167,6 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, windows-11-arm, macos-15]
- include:
- - os: ubuntu-22.04
- arch: x86_64
- python-version: "3.10"
- - os: ubuntu-22.04-arm
- arch: aarch64
- python-version: "3.10"
- - os: windows-2025
- arch: x86_64
- python-version: "3.10"
- - os: windows-11-arm
- arch: arm64
- # Python for Windows ARM64 is only available from 3.12+
- python-version: "3.12"
- - os: macos-15
- arch: arm64
- python-version: "3.10"
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
# code. This ensures compatibility across Python versions, as compatibility is
# dictated by the packaged code itself, not the Python version used for packaging.
@@ -220,30 +177,31 @@ jobs:
uses: actions/download-artifact@v4
with:
merge-multiple: true
- pattern: "shared_library*_${{ matrix.os }}_${{ matrix.arch }}*"
+ pattern: "shared_library*_${{ runner.os }}_${{ runner.arch }}*"
path: output/
- name: Copy correct platform shared library
shell: bash
run: |
ls -lR output/
- cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
- - name: Set up Python ${{ matrix.python-version }}
+ cp output/${{ runner.os }}/${{ runner.arch }}/* bitsandbytes/
+ - name: Set up Python
uses: actions/setup-python@v5
with:
- python-version: ${{ matrix.python-version }}
+ # Python for Windows ARM64 is only available from 3.12+
+ python-version: ${{ matrix.os == 'windows-11-arm' && '3.12' || '3.10' }}
cache: pip
- run: pip install build wheel
- run: python -m build .
- name: Determine and Set Platform Tag, then Tag Wheel
shell: bash
run: |
- PLATFORM_TAG=$(python .github/scripts/set_platform_tag.py "${{ matrix.arch }}")
+ PLATFORM_TAG=$(python .github/scripts/set_platform_tag.py "${{ runner.arch }}")
echo "PLATFORM_TAG=$PLATFORM_TAG"
wheel tags --remove --abi-tag=none --python-tag=py3 --platform-tag=$PLATFORM_TAG dist/bitsandbytes-*.whl
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
- name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}
+ name: bdist_wheel_${{ runner.os }}_${{ runner.arch }}
path: dist/bitsandbytes-*.whl
retention-days: 7
@@ -393,11 +351,6 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm]
- include:
- - os: ubuntu-22.04
- arch: x86_64
- - os: ubuntu-22.04-arm
- arch: aarch64
runs-on: ${{ matrix.os }}
env:
PIP_DISABLE_PIP_VERSION_CHECK: 1
@@ -406,7 +359,7 @@ jobs:
- name: Download wheel
uses: actions/download-artifact@v4
with:
- name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}
+ name: bdist_wheel_${{ runner.os }}_${{ runner.arch }}
path: wheels/
- name: Set up Python
uses: actions/setup-python@v5
diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml
index d1e81bc32..717c73265 100644
--- a/.github/workflows/test-runner.yml
+++ b/.github/workflows/test-runner.yml
@@ -48,8 +48,6 @@ jobs:
outputs:
test_runner: ${{ steps.config.outputs.test_runner }}
artifact_name: ${{ steps.config.outputs.artifact_name }}
- build_os: ${{ steps.config.outputs.build_os }}
- arch: ${{ steps.config.outputs.arch }}
steps:
- name: Configure test runner and paths
id: config
@@ -135,16 +133,9 @@ jobs:
echo "test_runner=${TEST_RUNNER}" >> $GITHUB_OUTPUT
echo "artifact_name=${ARTIFACT}" >> $GITHUB_OUTPUT
- echo "build_os=${BUILD_OS}" >> $GITHUB_OUTPUT
- echo "arch=${ARCH}" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
- - name: Set build environment variables
- shell: bash
- run: |
- echo "build_os=${{ steps.config.outputs.build_os }}" >> $GITHUB_ENV
- echo "build_arch=${{ steps.config.outputs.arch }}" >> $GITHUB_ENV
# Windows + CUDA: Install CUDA Toolkit
- name: Install CUDA Toolkit
@@ -162,6 +153,7 @@ jobs:
uses: ilammy/msvc-dev-cmd@v1.13.0
with:
arch: ${{ inputs.platform == 'windows-arm64' && 'arm64' || 'x64' }}
+ toolset: ${{ (inputs.platform == 'windows' && inputs.backend == 'cuda') && '14.44' || '' }}
# Build CPU backend
- name: Build C++
@@ -173,14 +165,14 @@ jobs:
if: inputs.backend == 'cuda'
run: bash .github/scripts/build-cuda.sh
env:
- cuda_version: ${{ inputs.cuda_version }}
- cuda_targets: "75;80;89"
+ CUDA_VERSION: ${{ inputs.cuda_version }}
+ CUDA_TARGETS: "75;80;89"
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: ${{ steps.config.outputs.artifact_name }}
- path: output/${{ steps.config.outputs.build_os }}/${{ steps.config.outputs.arch }}/*
+ path: output/${{ runner.os }}/${{ runner.arch }}/*
retention-days: 7
test:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d420edb1..fc7e41aac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,10 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
# Define included source files
set(CPP_FILES csrc/cpu_ops.cpp csrc/pythonInterface.cpp)
set(GPU_FILES csrc/ops.cu csrc/kernels.cu)
@@ -115,8 +119,6 @@ endif()
if (BUILD_CPU)
- set(CMAKE_CXX_STANDARD 17)
- set(CMAKE_CXX_STANDARD_REQUIRED ON)
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" HOST_ARCH)
if(MSVC)
# Use the experimental OpenMP runtime for persistent thread pool support.
@@ -127,16 +129,22 @@ if (BUILD_CPU)
endif()
if(BUILD_CUDA)
+ set(CMAKE_CUDA_STANDARD 17)
+ set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+ set(CMAKE_CUDA_EXTENSIONS OFF)
+
# NVCC normally will only work with MSVC up to 1939. VS2022 17.10+ starts using versions 1940+.
# Workaround: use --allow-unsupported-compiler
# This needs to be added *before* we try to enable the CUDA language so CMake's compiler check passes.
if(MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940)
string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler")
+ # Suppress MSVC STL version mismatch errors when using a newer compiler than CUDA officially supports.
# This is needed to build with VS2022 17.11+ and CUDA < 12.4.
if (MSVC_VERSION VERSION_GREATER_EQUAL 1941)
string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH")
endif()
+
endif()
enable_language(CUDA) # This will fail if CUDA is not found
diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
index 9a335f2d8..1a706dee4 100644
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -174,7 +174,7 @@ The currently distributed `bitsandbytes` packages are built with the following c
|--------------------|----------------------|----------------------|
| **Linux x86-64** | GCC 11.4 | AVX2 |
| **Linux aarch64** | GCC 11.4 | |
-| **Windows x86-64** | MSVC 19.43+ (VS2022) | AVX2 |
+| **Windows x86-64** | MSVC 19.51+ (VS2026) | AVX2 |
| **Windows arm64** | MSVC 19.43+ (VS2022) | ARM NEON |
| **macOS arm64** | Apple Clang 17 | |
@@ -201,6 +201,8 @@ pip install -e .
+Requires Visual Studio 2022 or 2026.
+
```bash
git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
pip install -e .
@@ -209,7 +211,7 @@ pip install -e .
-Requires Visual Studio 2022 with the **ARM64 C++ build tools** component, Python >= **3.12**, and PyTorch >= **2.12**.
+Requires Visual Studio 2022 or 2026 with the **ARM64 C++ build tools** component, Python >= **3.12**, and PyTorch >= **2.12**.
```bash
git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/
@@ -280,7 +282,7 @@ pip install -e . # `-e` for "editable" install, when developing BNB (otherwise
-Compilation on Windows requires Visual Studio with C++ support, CMake, Ninja, and Python >= **3.10**.
+Compilation on Windows requires Visual Studio 2022 with C++ support, CMake, Ninja, and Python >= **3.10**.
Instead of a system-wide ROCm installation, you can use the pip-installable ROCm SDK wheels from [repo.radeon.com](https://repo.radeon.com/rocm/windows/):