Skip to content

Commit 283b57c

Browse files
authored
Merge branch 'main' into matmul4bit
2 parents 479e84a + b075afc commit 283b57c

120 files changed

Lines changed: 26716 additions & 7596 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-format

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
BasedOnStyle: LLVM
3+
AlignAfterOpenBracket: BlockIndent
4+
BinPackArguments: true
5+
BinPackParameters: true
6+
BracedInitializerIndentWidth: 4
7+
ColumnLimit: 120
8+
Cpp11BracedListStyle: true
9+
IndentWidth: 4
10+
IndentWrappedFunctionNames: true
11+
PointerAlignment: Left
12+
SeparateDefinitionBlocks: Always
13+
Standard: c++17
14+
StatementMacros:
15+
- 'MAKE_PreconditionOptimizer32bit1State'
16+
- 'MAKE_PreconditionOptimizer32bit2State'
17+
- 'MAKE_PreconditionStatic8bit1State'
18+
- 'MAKE_PreconditionStatic8bit2State'
19+
- 'MAKE_Optimizer32bit1State'
20+
- 'MAKE_optimizerStatic8bit1State'
21+
- 'MAKE_optimizerStatic8bit2State'
22+
- 'MAKE_OptimizerStatic8bit1StateBlockwise'
23+
- 'MAKE_OptimizerStatic8bit2StateBlockwise'
24+
- 'MAKE_kQuantizeBlockwise'
25+
- 'MAKE_BLOCKWISE8'
26+
- 'MAKE_ELEMENTWISE_FUNC'
27+
- 'CMAKE_ELEMENTWISE_FUNC'
28+
- 'MAKE_FUNC8'
29+
- 'MAKE_FUNC32'
30+
- 'MAKE_CBLOCKWISE8'
31+
- 'MAKE_CFUNC8'
32+
- 'MAKE_CFUNC32'
33+
34+
UseTab: Never
35+
36+
...

.git-blame-ignore-revs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ ea7c14f8ef64924f2d0ff80df3cdabf2c7299848
1515

1616
# CHANGELOG: to reverse chron order + mdformat
1717
4743ff0d43e04e4cc3e5d8b9e7cd016c0defa36d
18+
19+
# Apply clang-format
20+
4955d136ae083c2be1236d8915913166e1790aad

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.bat text eol=crlf

.github/FUNDING.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
open_collective: bitsandbytes

.github/scripts/build-cuda.sh

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ if [[ -v cuda_targets ]]; then
1111
elif [ "${build_arch}" = "aarch64" ]; then
1212
build_capability="75;80;90"
1313

14-
# CUDA 12.8: Add sm100
15-
[[ "${cuda_version}" == 12.8.* ]] && build_capability="75;80;90;100"
14+
# CUDA 12.8-12.9: Add sm100/sm120
15+
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
16+
17+
# CUDA 13.0+: Add sm100/sm110/sm120
18+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120;121"
1619
else
17-
# By default, target Maxwell through Hopper.
18-
build_capability="50;52;60;61;70;75;80;86;89;90"
20+
# By default, target Pascal through Hopper.
21+
build_capability="60;70;75;80;86;89;90"
22+
23+
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
24+
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
1925

20-
# CUDA 12.8: Add sm100 and sm120; remove < sm75 to align with PyTorch 2.7+cu128 minimum
21-
[[ "${cuda_version}" == 12.8.* ]] && build_capability="75;80;86;89;90;100;120"
26+
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
27+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
2228
fi
2329

2430
[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
@@ -29,8 +35,8 @@ if [ "${build_os:0:6}" == ubuntu ]; then
2935
echo "Using image $image"
3036

3137
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
32-
"dnf update -y \
33-
&& dnf install cmake gcc-toolset-11 -y \
38+
"dnf -y --refresh update --security \
39+
&& dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \
3440
&& source scl_source enable gcc-toolset-11 \
3541
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \
3642
&& cmake --build . --config Release"

.github/scripts/build-rocm.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
declare build_arch
3+
declare build_os
4+
declare rocm_version
5+
6+
set -xeuo pipefail
7+
bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101"
8+
9+
# ROCm 6.4+ - Add gfx1150/gfx1151/gfx1200/gfx1201. Note we assume >=6.4.4.
10+
[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1150;gfx1151;gfx1200;gfx1201"
11+
12+
# ROCm 7.0+ - Add gfx950
13+
[[ "${rocm_version}" == 7.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
14+
15+
if [ "${build_os:0:6}" == ubuntu ]; then
16+
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
17+
echo "Using image $image"
18+
docker run --rm --platform "linux/$build_arch" -i \
19+
-w /src -v "$PWD:/src" "$image" sh -c \
20+
"apt-get update \
21+
&& pip install cmake==3.31.6 \
22+
&& cmake -DCOMPUTE_BACKEND=hip -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_HIP_FLAGS=\"--offload-compress\" -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
23+
&& cmake --build ."
24+
fi
25+
26+
output_dir="output/${build_os}/${build_arch}"
27+
mkdir -p "${output_dir}"
28+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
set INTEL_DLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
2+
set INTEL_DLE_TMP=%RUNNER_TEMP%\intel_dle
3+
set INTEL_DLE_LOG=%RUNNER_TEMP%\intel_dle_log.txt
4+
5+
echo ::group::Intel Deep Learning Essentials Installation
6+
curl -o intel-dle-installer.exe %INTEL_DLE_URL%
7+
start /wait "Intel DLE Install" intel-dle-installer.exe -f %INTEL_DLE_TMP% -l %INTEL_DLE_LOG% --silent -a --eula=accept -p=NEED_VS2022_INTEGRATION=0
8+
type %INTEL_DLE_LOG%
9+
if ERRORLEVEL 1 (
10+
echo Failed to install Intel Deep Learning Essentials
11+
exit /b 1
12+
)
13+
echo ::endgroup::
14+
15+
echo ::group::Build Environment Setup
16+
call "%ProgramFiles(x86)%\Intel\oneAPI\setvars.bat"
17+
cmake -G Ninja -DCOMPUTE_BACKEND=xpu -DCMAKE_BUILD_TYPE=Release .
18+
if ERRORLEVEL 1 (
19+
echo Failed to setup environment
20+
exit /b 1
21+
)
22+
echo ::endgroup::
23+
24+
echo ::group::Building with XPU backend
25+
cmake --build . --config Release
26+
if ERRORLEVEL 1 (
27+
echo Build failed
28+
exit /b 1
29+
)
30+
echo ::endgroup::
31+
32+
set output_dir=output\%build_os%\x86_64
33+
if not exist "%output_dir%" mkdir "%output_dir%"
34+
copy bitsandbytes\*.dll "%output_dir%\" 2>nul

.github/scripts/build-xpu.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
declare build_os
3+
4+
set -xeuo pipefail
5+
6+
# We currently only build XPU on Linux.
7+
if [ "${build_os:0:6}" == ubuntu ]; then
8+
# TODO: We might want to pre-build this as our own customized image in the future.
9+
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
10+
echo "Using image $image"
11+
docker run --rm -i \
12+
-w /src -v "$PWD:/src" "$image" sh -c \
13+
"apt-get update \
14+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15+
cmake bison intel-fw-gpu intel-ocloc \
16+
&& cmake -DCOMPUTE_BACKEND=xpu . \
17+
&& cmake --build . --config Release"
18+
fi
19+
20+
output_dir="output/${build_os}/x86_64"
21+
mkdir -p "${output_dir}"
22+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")

.github/scripts/set_platform_tag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def get_platform_tag(architecture):
99
if system == "Linux":
1010
tag = "manylinux_2_24_x86_64" if architecture == "x86_64" else "manylinux_2_24_aarch64"
1111
elif system == "Darwin":
12-
tag = "macosx_13_1_x86_64" if architecture == "x86_64" else "macosx_13_1_arm64"
12+
tag = "macosx_14_0_arm64"
1313
elif system == "Windows":
1414
tag = "win_amd64" if architecture == "x86_64" else "win_arm64"
1515
else:

0 commit comments

Comments
 (0)