Skip to content

Commit 479e84a

Browse files
authored
Merge branch 'main' into matmul4bit
2 parents 18c9659 + 318a86e commit 479e84a

36 files changed

Lines changed: 1080 additions & 962 deletions

.github/workflows/tests.yml

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,32 @@ jobs:
9393
path: output/${{ matrix.os }}/${{ matrix.arch }}/*
9494
retention-days: 7
9595

96-
cpu-tests:
96+
test-cpu:
9797
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
9898
needs: build-cpu
9999
strategy:
100100
fail-fast: false
101101
matrix:
102102
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
103-
torch_version: ["2.6.0", "2.7.0"]
103+
# Test with the oldest supported torch version and the two newest.
104+
torch_version: ["2.2.2", "2.6.0", "2.7.0"]
104105
include:
105106
- os: ubuntu-22.04
106107
arch: x86_64
107108
runner: banb-aws-general-8-plus-use1-public-80
108109
- os: ubuntu-22.04-arm
109110
arch: aarch64
111+
- os: ubuntu-22.04-arm
112+
arch: aarch64
113+
torch_version: "2.5.1"
110114
- os: windows-2025
111115
arch: x86_64
112116
- os: macos-15
113117
arch: arm64
118+
exclude:
119+
- os: ubuntu-22.04-arm
120+
torch_version: "2.2.2"
121+
114122
runs-on: ${{ matrix.runner || matrix.os }}
115123
env:
116124
BNB_TEST_DEVICE: cpu
@@ -129,22 +137,71 @@ jobs:
129137
with:
130138
python-version: 3.9
131139

140+
- name: Setup MSVC
141+
if: startsWith(matrix.os, 'windows')
142+
uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl for torch.compile
143+
132144
- name: Install dependencies
133145
run: |
134146
pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/cpu
135147
pip install -e ".[test]"
136148
pip install pytest-cov
137149
150+
# We need to downgrade to numpy<2 for torch<2.3 compatibility.
151+
- name: Downgrade NumPy
152+
if: startsWith(matrix.torch_version, '2.2.')
153+
run: pip install "numpy<2"
154+
155+
- name: Show installed packages
156+
run: pip list
157+
158+
- name: Show environment information
159+
run: python -m torch.utils.collect_env
160+
161+
- name: Run tests
162+
run: pytest --durations=100
163+
164+
test-cpu-ipex:
165+
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
166+
needs: build-cpu
167+
runs-on: banb-aws-general-8-plus-use1-public-80
168+
env:
169+
BNB_TEST_DEVICE: cpu
170+
steps:
171+
- uses: actions/checkout@v4
172+
173+
- name: Download build artifact
174+
uses: actions/download-artifact@v4
175+
with:
176+
name: lib_cpu_ubuntu-22.04_x86_64
177+
path: bitsandbytes/
178+
merge-multiple: true
179+
180+
- name: Setup Python
181+
uses: actions/setup-python@v5
182+
with:
183+
python-version: 3.9
184+
185+
- name: Install dependencies
186+
run: |
187+
pip install torch==2.7.0 --index-url https://download.pytorch.org/whl/cpu
188+
pip install intel_extension_for_pytorch==2.7.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
189+
pip install -e ".[test]"
190+
pip install pytest-cov
191+
138192
- name: Show installed packages
139193
run: pip list
140194

141195
- name: Show environment information
142196
run: python -m torch.utils.collect_env
143197

198+
- name: IPEX smoke test
199+
run: python -c "import torch; import intel_extension_for_pytorch as ipex; print(torch.__version__); print(ipex.__version__);"
200+
144201
- name: Run tests
145202
run: pytest --durations=100
146203

147-
# cuda-aarch64-tests:
204+
# test-cuda-aarch64:
148205
# if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
149206
# needs: build-cuda
150207
# strategy:
@@ -165,9 +222,7 @@ jobs:
165222
# - name: Show pip packages
166223
# run: pip list
167224

168-
169-
170-
cuda-tests:
225+
test-cuda:
171226
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
172227
needs: build-cuda
173228
strategy:
@@ -179,7 +234,7 @@ jobs:
179234
cuda_version: ["11.8.0", "12.6.3", "12.8.1"]
180235
include:
181236
- cuda_version: "11.8.0"
182-
torch_version: "2.4.1"
237+
torch_version: "2.2.2"
183238
pypi_index: "https://download.pytorch.org/whl/cu118"
184239
- cuda_version: "12.6.3"
185240
torch_version: "2.6.0"
@@ -188,18 +243,40 @@ jobs:
188243
torch_version: "2.7.0"
189244
pypi_index: "https://download.pytorch.org/whl/cu128"
190245

191-
# L40S runners
246+
247+
# Linux L40S runners
192248
- os: ubuntu-22.04
193249
gpu: L40S
194250
runner: bandb-aws-g6e-4xlarge-plus-use1-public-80
195251

196-
# T4 runners
252+
# Linux T4 runners
197253
- os: ubuntu-22.04
198254
gpu: T4
199255
runner: bandb-aws-g4dn-4xlarge-plus-use1-public-80
256+
257+
# Specific Windows runners using cu118
200258
- os: windows-2025
259+
arch: x86_64
201260
gpu: T4
202261
runner: CUDA-Windows-x64
262+
cuda_version: "11.8.0"
263+
torch_version: "2.2.0"
264+
pypi_index: "https://download.pytorch.org/whl/cu118"
265+
- os: windows-2025
266+
arch: x86_64
267+
gpu: T4
268+
runner: CUDA-Windows-x64
269+
cuda_version: "11.8.0"
270+
torch_version: "2.6.0"
271+
pypi_index: "https://download.pytorch.org/whl/cu118"
272+
- os: windows-2025
273+
arch: x86_64
274+
gpu: T4
275+
runner: CUDA-Windows-x64
276+
cuda_version: "11.8.0"
277+
torch_version: "2.7.0"
278+
pypi_index: "https://download.pytorch.org/whl/cu118"
279+
203280
exclude:
204281
# Our current T4 Windows runner has a driver too old (471.11)
205282
# and cannot support CUDA 12+. Skip for now.
@@ -238,6 +315,11 @@ jobs:
238315
pip install -e ".[test]"
239316
pip install pytest-cov
240317
318+
# We need to downgrade to numpy<2 for torch<2.3 compatibility.
319+
- name: Downgrade NumPy
320+
if: startsWith(matrix.torch_version, '2.2.')
321+
run: pip install "numpy<2"
322+
241323
- name: Show installed packages
242324
run: pip list
243325

benchmarking/int8/row_scale_benchmark.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

bitsandbytes/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
if torch.cuda.is_available():
3535
from .backends.cuda import ops as cuda_ops
3636

37+
if hasattr(torch, "xpu") and torch.xpu.is_available():
38+
from .backends.xpu import ops as xpu_ops
39+
3740

3841
def _import_backends():
3942
"""
@@ -64,4 +67,4 @@ def _import_backends():
6467
"optim.optimizer.MockArgs": False,
6568
}
6669

67-
__version__ = "0.46.0.dev0"
70+
__version__ = "0.47.0.dev0"

bitsandbytes/_ops.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import torch
66

7+
from .cextension import ipex_cpu, ipex_xpu
8+
79
_IS_TORCH_GTE_24 = False
810

911
if hasattr(torch.library, "register_fake"):
@@ -327,3 +329,22 @@ def _(
327329
)
328330
torch._check(out.device == A.device, lambda: f"Expected out.device == {A.device}, got {out.device}")
329331
torch._check(out.dtype == A.dtype, lambda: f"Expected out.dtype == {A.dtype}, got {out.dtype}")
332+
333+
334+
if ipex_cpu or ipex_xpu:
335+
# Register the dequantize_nf4_ipex implementation
336+
torch.library.define(
337+
"bitsandbytes::dequantize_nf4_ipex",
338+
"(Tensor A, Tensor absmax, int blocksize, int[] shape, ScalarType dtype) -> Tensor",
339+
)
340+
341+
@register_fake("bitsandbytes::dequantize_nf4_ipex")
342+
def _(
343+
A: torch.Tensor,
344+
absmax: torch.Tensor,
345+
blocksize: int,
346+
shape: Sequence[int],
347+
dtype: torch.dtype,
348+
) -> torch.Tensor:
349+
torch._check_is_size(blocksize)
350+
return torch.empty(shape, dtype=dtype, device=A.device)

0 commit comments

Comments
 (0)