Skip to content

Commit 597e81a

Browse files
authored
Merge branch 'main' into fix_quant_state
2 parents e59236a + 9dd8b70 commit 597e81a

94 files changed

Lines changed: 14892 additions & 4755 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.bat text eol=crlf

.github/scripts/build-cuda.sh

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ if [[ -v cuda_targets ]]; then
1111
elif [ "${build_arch}" = "aarch64" ]; then
1212
build_capability="75;80;90"
1313

14-
# CUDA 12.8+: Add sm100/sm120
14+
# CUDA 12.8-12.9: Add sm100/sm120
1515
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
16+
17+
# CUDA 13.0+: Add sm100/sm110/sm120
18+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121"
1619
else
1720
# By default, target Pascal through Hopper.
1821
build_capability="60;70;75;80;86;89;90"
1922

2023
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
2124
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
25+
26+
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
27+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
2228
fi
2329

2430
[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
@@ -29,8 +35,8 @@ if [ "${build_os:0:6}" == ubuntu ]; then
2935
echo "Using image $image"
3036

3137
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
32-
"dnf update -y \
33-
&& dnf install cmake gcc-toolset-11 -y \
38+
"dnf -y --refresh update --security \
39+
&& dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \
3440
&& source scl_source enable gcc-toolset-11 \
3541
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \
3642
&& cmake --build . --config Release"

.github/scripts/build-rocm.sh

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,22 @@ declare build_os
44
declare rocm_version
55

66
set -xeuo pipefail
7-
bnb_rocm_arch="gfx90a;gfx942;gfx1100"
7+
bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101"
8+
9+
# ROCm 6.4+ - Add gfx1150/gfx1151/gfx1200/gfx1201. Note we assume >=6.4.4.
10+
[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1200;gfx1201"
11+
12+
# ROCm 7.0+ - Add gfx950
13+
[[ "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
14+
815
if [ "${build_os:0:6}" == ubuntu ]; then
9-
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
10-
echo "Using image $image"
11-
docker run --rm --platform "linux/$build_arch" -i \
12-
-w /src -v "$PWD:/src" "$image" sh -c \
13-
"apt-get update \
14-
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
15-
&& cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
16+
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
17+
echo "Using image $image"
18+
docker run --rm --platform "linux/$build_arch" -i \
19+
-w /src -v "$PWD:/src" "$image" sh -c \
20+
"apt-get update \
21+
&& pip install cmake==3.31.6 \
22+
&& cmake -DCOMPUTE_BACKEND=hip -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_HIP_FLAGS=\"--offload-compress\" -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
1623
&& cmake --build ."
1724
fi
1825

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
set INTEL_DLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
2+
set INTEL_DLE_TMP=%RUNNER_TEMP%\intel_dle
3+
set INTEL_DLE_LOG=%RUNNER_TEMP%\intel_dle_log.txt
4+
5+
echo ::group::Intel Deep Learning Essentials Installation
6+
curl -o intel-dle-installer.exe %INTEL_DLE_URL%
7+
start /wait "Intel DLE Install" intel-dle-installer.exe -f %INTEL_DLE_TMP% -l %INTEL_DLE_LOG% --silent -a --eula=accept -p=NEED_VS2022_INTEGRATION=0
8+
type %INTEL_DLE_LOG%
9+
if ERRORLEVEL 1 (
10+
echo Failed to install Intel Deep Learning Essentials
11+
exit /b 1
12+
)
13+
echo ::endgroup::
14+
15+
echo ::group::Build Environment Setup
16+
call "%ProgramFiles(x86)%\Intel\oneAPI\setvars.bat"
17+
cmake -G Ninja -DCOMPUTE_BACKEND=xpu -DCMAKE_BUILD_TYPE=Release .
18+
if ERRORLEVEL 1 (
19+
echo Failed to setup environment
20+
exit /b 1
21+
)
22+
echo ::endgroup::
23+
24+
echo ::group::Building with XPU backend
25+
cmake --build . --config Release
26+
if ERRORLEVEL 1 (
27+
echo Build failed
28+
exit /b 1
29+
)
30+
echo ::endgroup::
31+
32+
set output_dir=output\%build_os%\x86_64
33+
if not exist "%output_dir%" mkdir "%output_dir%"
34+
copy bitsandbytes\*.dll "%output_dir%\" 2>nul

.github/scripts/build-xpu.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
declare build_os
3+
4+
set -xeuo pipefail
5+
6+
# We currently only build XPU on Linux.
7+
if [ "${build_os:0:6}" == ubuntu ]; then
8+
# TODO: We might want to pre-build this as our own customized image in the future.
9+
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
10+
echo "Using image $image"
11+
docker run --rm -i \
12+
-w /src -v "$PWD:/src" "$image" sh -c \
13+
"apt-get update \
14+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15+
cmake bison intel-fw-gpu intel-ocloc \
16+
&& cmake -DCOMPUTE_BACKEND=xpu . \
17+
&& cmake --build . --config Release"
18+
fi
19+
20+
output_dir="output/${build_os}/x86_64"
21+
mkdir -p "${output_dir}"
22+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")

.github/scripts/set_platform_tag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def get_platform_tag(architecture):
99
if system == "Linux":
1010
tag = "manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64"
1111
elif system == "Darwin":
12-
tag = "macosx_13_1_x86_64" if architecture == "x86_64" else "macosx_13_1_arm64"
12+
tag = "macosx_14_0_arm64"
1313
elif system == "Windows":
1414
tag = "win_amd64" if architecture == "x86_64" else "win_arm64"
1515
else:

.github/workflows/python-package.yml

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ on:
66
branches: [main]
77
paths:
88
- ".github/workflows/python-package.yml"
9+
- ".github/scripts/**"
910
- "bitsandbytes/**"
1011
- "csrc/**"
1112
- "include/**"
1213
- "tests/**"
1314
- "CMakeLists.txt"
14-
- "requirements*.txt"
15+
- "MANIFEST.in"
1516
- "setup.py"
1617
- "pyproject.toml"
1718
release:
@@ -25,19 +26,19 @@ concurrency:
2526

2627
jobs:
2728
##
28-
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
29+
# This job matrix builds the CPU versions of the libraries for all supported platforms.
2930
##
30-
build-shared-libs:
31+
build-cpu:
3132
strategy:
3233
matrix:
3334
include:
3435
- os: ubuntu-22.04
3536
arch: x86_64
3637
- os: ubuntu-22.04-arm
3738
arch: aarch64
38-
- os: windows-latest
39+
- os: windows-2025
3940
arch: x86_64
40-
- os: macos-latest
41+
- os: macos-15
4142
arch: arm64
4243
runs-on: ${{ matrix.os }}
4344
steps:
@@ -56,37 +57,39 @@ jobs:
5657
name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
5758
path: output/*
5859
retention-days: 7
60+
5961
##
6062
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
6163
##
62-
build-shared-libs-cuda:
64+
build-cuda:
6365
strategy:
6466
fail-fast: false
6567
matrix:
66-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest]
68+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
6769
include:
6870
- os: ubuntu-22.04
6971
arch: x86_64
7072
- os: ubuntu-22.04-arm
7173
arch: aarch64
72-
- os: windows-latest
74+
- os: windows-2025
7375
arch: x86_64
7476
cuda_version:
75-
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1"]
77+
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2"]
7678
runs-on: ${{ matrix.os }}
7779
steps:
7880
- uses: actions/checkout@v4
7981
# Windows: We install Cuda on the agent (slow)
80-
- uses: Jimver/cuda-toolkit@c35baa1a18fd1fc9dcf47c5bd839bf30559c0bc3 # v0.2.24
82+
- uses: Jimver/cuda-toolkit@6008063726ffe3309d1b22e413d9e88fed91a2f2 # v0.2.29
8183
if: startsWith(matrix.os, 'windows')
8284
id: cuda-toolkit
8385
with:
84-
# Temporary: Use CUDA 12.9.0 for Windows until 12.9.1 is supported with this action.
85-
cuda: ${{ matrix.cuda_version == '12.9.1' && '12.9.0' || matrix.cuda_version }}
86+
cuda: ${{ matrix.cuda_version }}
8687
method: "network"
87-
sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]'
88-
linux-local-args: '["--toolkit"]'
88+
# The "crt" "nvvm" and "nvptxcompiler" components are added for CUDA 13.
89+
sub-packages: ${{ format('["nvcc"{0},"cudart","cusparse","cublas","thrust","cublas_dev","cusparse_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }}
8990
use-github-cache: false
91+
use-local-cache: false
92+
log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt
9093
- name: Setup MSVC
9194
if: startsWith(matrix.os, 'windows')
9295
uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl
@@ -103,37 +106,56 @@ jobs:
103106
path: output/*
104107
retention-days: 7
105108

106-
build-shared-libs-rocm:
109+
build-xpu:
110+
strategy:
111+
matrix:
112+
os: [ubuntu-22.04, windows-2025]
113+
runs-on: ${{ matrix.os }}
114+
steps:
115+
- uses: actions/checkout@v4
116+
- name: Build C++ (Linux)
117+
if: runner.os == 'Linux'
118+
run: bash .github/scripts/build-xpu.sh
119+
env:
120+
build_os: ${{ matrix.os }}
121+
- name: Build C++ (Windows)
122+
if: runner.os == 'Windows'
123+
run: .github/scripts/build-xpu-windows.bat
124+
shell: cmd
125+
env:
126+
build_os: ${{ matrix.os }}
127+
- name: Upload build artifact
128+
uses: actions/upload-artifact@v4
129+
with:
130+
name: shared_library_xpu_${{ matrix.os }}_x86_64
131+
path: output/*
132+
retention-days: 7
133+
134+
build-rocm:
107135
strategy:
108136
matrix:
109137
os: [ubuntu-22.04]
110138
arch: [x86_64]
111-
rocm_version:
112-
["6.1.2", "6.2.4", "6.3.2"]
139+
rocm_version: ["6.2.4", "6.3.4", "6.4.4", "7.0.2", "7.1", "7.2"]
113140
runs-on: ${{ matrix.os }}
114141
steps:
115142
- uses: actions/checkout@v4
116-
- name: Set up Docker multiarch
117-
uses: docker/setup-qemu-action@v3
118143
- name: Clean up disk space
119144
run: |
145+
echo "Disk space before cleanup:"
146+
df -h
147+
148+
# These are the biggest disk space hogs.
120149
sudo rm -rf \
121-
/usr/share/dotnet \
122-
/opt/ghc \
123-
"/usr/local/share/boost" \
124-
"$AGENT_TOOLSDIRECTORY" \
125-
/opt/hostedtoolcache \
126-
/opt/google/chrome \
127-
/opt/microsoft/msedge \
128-
/opt/microsoft/powershell \
129-
/opt/pipx \
130-
/usr/lib/mono \
131-
/usr/local/julia* \
132-
/usr/local/lib/android \
133-
/usr/local/lib/node_modules \
134-
/usr/local/share/chromium \
135-
/usr/local/share/powershell \
136-
/usr/share/swift
150+
/opt/hostedtoolcache/CodeQL \
151+
/usr/lib/dotnet \
152+
/usr/lib/jvm \
153+
/usr/local/.ghcup \
154+
/usr/local/lib/android \
155+
/usr/share/swift
156+
157+
echo "Disk space after cleanup:"
158+
df -h
137159
- name: Build C++
138160
run: bash .github/scripts/build-rocm.sh
139161
env:
@@ -148,24 +170,28 @@ jobs:
148170
retention-days: 7
149171

150172
build-wheels:
173+
env:
174+
# Skip rebuilding the CPU library when building the wheels.
175+
BNB_SKIP_CMAKE: 1
151176
needs:
152-
- build-shared-libs
153-
- build-shared-libs-cuda
154-
- build-shared-libs-rocm
177+
- build-cpu
178+
- build-cuda
179+
- build-rocm
180+
- build-xpu
155181
strategy:
156182
matrix:
157-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest, macos-latest]
183+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
158184
include:
159185
- os: ubuntu-22.04
160186
arch: x86_64
161187
- os: ubuntu-22.04-arm
162188
arch: aarch64
163-
- os: windows-latest
189+
- os: windows-2025
164190
arch: x86_64
165-
- os: macos-latest
191+
- os: macos-15
166192
arch: arm64
167193
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
168-
# code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is
194+
# code. This ensures compatibility across Python versions, as compatibility is
169195
# dictated by the packaged code itself, not the Python version used for packaging.
170196
python-version: ["3.10"]
171197
runs-on: ${{ matrix.os }}
@@ -239,8 +265,7 @@ jobs:
239265
# `pip install https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl`
240266
STABLE_PLACEHOLDER_VERSION="1.33.7.preview"
241267
242-
# exclude macos wheels for now
243-
find tmp/ -type f -name '*.whl' ! -name '*macos*' -print0 | while IFS= read -r -d '' wheel; do
268+
find tmp/ -type f -name '*.whl' -print0 | while IFS= read -r -d '' wheel; do
244269
wheel_filename=$(basename "$wheel")
245270
246271
# Strip off the original version
@@ -291,9 +316,11 @@ jobs:
291316
if [[ "$fname" == *"manylinux_2_24_x86_64"* ]]; then
292317
echo "### Linux (x86_64)" >> body.md
293318
elif [[ "$fname" == *"manylinux_2_24_aarch64"* ]]; then
294-
echo "### Linux (ARM/aarch64)" >> body.md
319+
echo "### Linux (aarch64)" >> body.md
295320
elif [[ "$fname" == *"win_amd64"* ]]; then
296321
echo "### Windows (x86_64)" >> body.md
322+
elif [[ "$fname" == *"macosx"* ]]; then
323+
echo "### macOS 14+ (arm64)" >> body.md
297324
else
298325
echo "### Other platform" >> body.md
299326
fi
@@ -313,7 +340,7 @@ jobs:
313340
> pip install https://.../bitsandbytes-1.33.7-preview-py3-none-manylinux_2_24_x86_64.whl
314341
Collecting bitsandbytes==1.33.7rc0
315342
...
316-
Successfully installed bitsandbytes-0.46.0.dev0
343+
Successfully installed bitsandbytes-0.49.0.dev0
317344
```
318345
ENDOFMARKDOWN
319346
@@ -378,9 +405,6 @@ jobs:
378405
pattern: "bdist_wheel_*"
379406
merge-multiple: true
380407

381-
- name: Remove macOS wheels
382-
run: rm dist/*macos*
383-
384408
- name: Publish to PyPI
385409
uses: pypa/gh-action-pypi-publish@release/v1
386410
with:

0 commit comments

Comments
 (0)