fix contiguous

jiqing-feng · jiqing-feng · commit 9e7080028fe8 · 2026-03-19T13:01:39.000+08:00
Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
diff --git a/bitsandbytes/backends/triton/ops.py b/bitsandbytes/backends/triton/ops.py
@@ -18,7 +18,7 @@ def quantize_blockwise(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> t
     torch._check_is_size(blocksize)
     # torch._check(A.dtype == torch.float32, lambda: f"A must be float32 on xpu, got {A.dtype}")
     with torch_accelerator_module.device(A.device):
-        out, absmax = kernels_8bit_quant.quantize_blockwise_triton(A, code, blocksize)
+        out, absmax = kernels_8bit_quant.quantize_blockwise_triton(A.contiguous(), code, blocksize)
         return out, absmax.float()
 
 
@@ -30,7 +30,7 @@ def dequantize_blockwise(
     # torch._check(dtype == torch.float32, lambda: f"dtype must be float32 on xpu, got {dtype}")
     with torch_accelerator_module.device(A.device):
         out = kernels_8bit_quant.dequant_8bit_blockwise(
-            A,
+            A.contiguous(),
             absmax,
             code,
             blocksize,