Skip to content

Commit af7410d

Browse files
committed
fix shape
Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
1 parent bfed130 commit af7410d

File tree

1 file changed

+4
-2
lines changed
  • bitsandbytes/backends/cpu

1 file changed

+4
-2
lines changed

bitsandbytes/backends/cpu/ops.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def _(
266266
blocksize: int,
267267
) -> torch.Tensor:
268268
if B.dtype != torch.uint8:
269-
B = B.view(torch.uint8)
269+
B = B.contiguous().view(torch.uint8)
270270
dtype = A.dtype
271271
quant_type = "fp4" if code[1] > 0 else "nf4"
272272
# cpu fused op only support bf16 for now.
@@ -280,7 +280,9 @@ def _(
280280
out_shape = (*A.shape[:-1], shapeB[0])
281281
if gemm_4bit_forward_kernel is not None:
282282
quant_type_num = 1 if quant_type == "fp4" else 0
283-
out = gemm_4bit_forward_kernel(A, B, absmax, blocksize, quant_type_num)
283+
# C++ kernel expects weight shape (N, K_packed), ensure 2D contiguous
284+
B_2d = B.reshape(shapeB[0], -1).contiguous()
285+
out = gemm_4bit_forward_kernel(A, B_2d, absmax, blocksize, quant_type_num)
284286
else:
285287
out = torch.empty(out_shape, dtype=A.dtype, device=A.device)
286288
M = A.shape[0]

0 commit comments

Comments
 (0)