Skip to content

Commit 29564ad

Browse files
CI: Setup HPU nightly tests (#1681)
* Setup XPU CI * CI: expand XPU matrix * test * test * test * test * test * test * test * test * test * test * skip some fp4 tests on hpu * skip some fp4 tests on hpu * skip gemv tests on hpu * test * Additional test patches for HPU * HPU test update * HPU test update * HPU test update * HPU test update * Format
1 parent 70bbbb9 commit 29564ad

File tree

5 files changed

+161
-7
lines changed

5 files changed

+161
-7
lines changed

.github/workflows/tests.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,133 @@ jobs:
222222
# - name: Show pip packages
223223
# run: pip list
224224

225+
test-hpu:
226+
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
227+
needs: build-cpu
228+
strategy:
229+
fail-fast: false
230+
matrix:
231+
torch_version: ["2.6.0"]
232+
runs-on:
233+
group: bandb-itac-bmemr-gaudi3-1gaudi
234+
env:
235+
BNB_TEST_DEVICE: hpu
236+
container:
237+
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
238+
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
239+
env:
240+
OMPI_MCA_btl_vader_single_copy_mechanism: none
241+
BNB_TEST_DEVICE: hpu
242+
steps:
243+
- name: Show system information
244+
run: |
245+
echo "OS: $(uname -a)"
246+
echo "CPU: $(lscpu | grep 'Model name')"
247+
echo "Memory: $(free -h)"
248+
249+
- name: Show HPU Information
250+
run: |
251+
hl-smi
252+
253+
- uses: actions/checkout@v4
254+
255+
- name: Download build artifact
256+
uses: actions/download-artifact@v4
257+
with:
258+
name: lib_cpu_ubuntu-22.04_x86_64
259+
path: bitsandbytes/
260+
merge-multiple: true
261+
262+
- name: Show installed packages
263+
run: pip list
264+
265+
- name: Install dependencies
266+
run: |
267+
pip install -e ".[test]"
268+
pip install pytest-cov
269+
270+
- name: Show installed packages
271+
run: pip list
272+
273+
- name: Show environment information
274+
run: |
275+
python -m torch.utils.collect_env
276+
python -m bitsandbytes
277+
278+
- name: Run tests
279+
run: pytest --durations=100
280+
281+
test-xpu:
282+
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
283+
needs: build-cpu
284+
strategy:
285+
fail-fast: false
286+
matrix:
287+
torch_version: ["2.7.1"] #["2.6.0", "2.7.1"]
288+
ipex: [false]
289+
# ipex: [true, false]
290+
# include:
291+
# - torch_version: "2.6.0"
292+
# ipex: true
293+
# ipex_version: "2.6.10+xpu"
294+
# - torch_version: "2.7.1"
295+
# ipex: true
296+
# ipex_version: "2.7.10+xpu"
297+
runs-on:
298+
group: bandb-itac-bmsprpvc1550-8-1gpu
299+
env:
300+
BNB_TEST_DEVICE: xpu
301+
steps:
302+
- name: Show system information
303+
run: |
304+
echo "OS: $(uname -a)"
305+
echo "CPU: $(lscpu | grep 'Model name')"
306+
echo "Memory: $(free -h)"
307+
308+
- name: Show XPU Information
309+
run: |
310+
xpu-smi discovery
311+
sudo xpu-smi discovery
312+
sudo apt-get install -y hwinfo
313+
hwinfo --display
314+
315+
- uses: actions/checkout@v4
316+
317+
- name: Download build artifact
318+
uses: actions/download-artifact@v4
319+
with:
320+
name: lib_cpu_ubuntu-22.04_x86_64
321+
path: bitsandbytes/
322+
merge-multiple: true
323+
324+
- name: Setup Python
325+
uses: actions/setup-python@v5
326+
with:
327+
python-version: 3.9
328+
329+
- name: Install PyTorch
330+
run: pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/xpu
331+
332+
- name: Install IPEX
333+
if: matrix.ipex == true
334+
run: pip install intel_extension_for_pytorch==${{ matrix.ipex_version }} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
335+
336+
- name: Install dependencies
337+
run: |
338+
pip install -e ".[test]"
339+
pip install pytest-cov
340+
341+
- name: Show installed packages
342+
run: pip list
343+
344+
- name: Show environment information
345+
run: |
346+
python -m torch.utils.collect_env
347+
python -m bitsandbytes
348+
349+
# - name: Run tests
350+
# run: pytest --durations=100
351+
225352
test-cuda:
226353
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
227354
needs: build-cuda

bitsandbytes/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# LICENSE file in the root directory of this source tree.
55

66

7+
import importlib
78
import sys
89

910
import torch
@@ -37,8 +38,13 @@
3738
if hasattr(torch, "xpu") and torch.xpu.is_available():
3839
from .backends.xpu import ops as xpu_ops
3940

40-
if hasattr(torch, "hpu") and torch.hpu.is_available():
41-
from .backends.hpu import ops as hpu_ops
41+
42+
if importlib.util.find_spec("habana_frameworks") and importlib.util.find_spec("habana_frameworks.torch"):
43+
# In case not automatically imported
44+
import habana_frameworks.torch
45+
46+
if hasattr(torch, "hpu") and torch.hpu.is_available():
47+
from .backends.hpu import ops as hpu_ops
4248

4349

4450
def _import_backends():

tests/test_autograd.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ def test_matmul_4bit(
234234
out_bnb.data.copy_(out_torch)
235235
if device == "cuda":
236236
torch.cuda.synchronize()
237+
elif device == "hpu":
238+
torch.hpu.synchronize()
239+
237240
loss_bnb = torch.nn.functional.mse_loss(out_bnb, target).mean()
238241
loss_bnb.backward()
239242
gradA1 = A.grad

tests/test_linear8bitlt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
257257
ref_output = net(x)
258258

259259
# Compile the model
260-
compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode)
260+
compile_backend = "hpu_backend" if device == "hpu" else "inductor"
261+
compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode, backend=compile_backend)
261262

262263
# Get output from compiled model
263264
with torch.no_grad():

tests/test_modules.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from torch import nn
66

77
import bitsandbytes as bnb
8-
from tests.helpers import get_available_devices, id_formatter
8+
from tests.helpers import get_available_devices, id_formatter, is_supported_on_hpu
99

1010

1111
class MockArgs:
@@ -276,9 +276,9 @@ def test_linear_kbit_fp32_bias(device, module):
276276
"NF4": bnb.nn.LinearNF4,
277277
"FP4+C": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compress_statistics=True),
278278
"NF4+C": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True),
279-
"NF4+fp32": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float32),
280-
"NF4+fp16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16),
281-
"NF4+bf16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16),
279+
"NF4+fp32": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float32),
280+
"NF4+fp16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float16),
281+
"NF4+bf16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.bfloat16),
282282
}
283283

284284

@@ -295,7 +295,12 @@ def test_kbit_backprop(device, module):
295295
torch.nn.init.kaiming_normal_(ref[0].weight)
296296
torch.nn.init.kaiming_normal_(ref[1].weight)
297297
ref[1].weight.requires_grad_(False)
298+
298299
kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)])
300+
301+
if device == "hpu" and isinstance(kbit[1], bnb.nn.Linear4bit) and kbit[1].weight.quant_type == "fp4":
302+
pytest.skip("FP4 is not supported on HPU")
303+
299304
kbit[0].weight.detach().copy_(ref[0].weight)
300305
kbit[1].weight.detach().copy_(ref[1].weight)
301306
kbit[0].bias.detach().copy_(ref[0].bias)
@@ -358,6 +363,12 @@ def test_kbit_backprop(device, module):
358363
ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
359364
)
360365
def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim, quant_storage):
366+
if device == "hpu":
367+
if embedding_class is bnb.nn.EmbeddingFP4:
368+
pytest.skip("FP4 is not supported on HPU")
369+
elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
370+
pytest.skip("This configuration is not supported on HPU")
371+
361372
num_embeddings = 128
362373

363374
src_weight = (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to(
@@ -403,6 +414,12 @@ def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim,
403414
ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
404415
)
405416
def test_embedding_error(device, embedding_class, input_shape, embedding_dim, quant_storage):
417+
if device == "hpu":
418+
if embedding_class is bnb.nn.EmbeddingFP4:
419+
pytest.skip("FP4 is not supported on HPU")
420+
elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
421+
pytest.skip("This configuration is not supported on HPU")
422+
406423
is_8bit = embedding_class is bnb.nn.Embedding8bit
407424

408425
num_embeddings = 128

0 commit comments

Comments
 (0)