Skip to content

Commit 0b5f6d4

Browse files
committed
Make kernel configurations symetric between ROCm and CUDA for bs 64
1 parent 9b2622a commit 0b5f6d4

File tree

2 files changed

+1
-5
lines changed

2 files changed

+1
-5
lines changed

csrc/kernels.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,7 @@ MAKE_kQuantizeBlockwiseSmall(half, 32, NF4)
17831783
MAKE_kQuantizeBlockwiseSmall(float, 32, NF4)
17841784
MAKE_kQuantizeBlockwiseSmall(bnb_bfloat16, 32, NF4)
17851785

1786-
// QBLOCK_SIZE=64 instantiations (used on HIP for blocksize=64)
1786+
// QBLOCK_SIZE=64 instantiations (blocksize=64, 4-bit)
17871787
MAKE_kQuantizeBlockwiseSmall(half, 64, FP4)
17881788
MAKE_kQuantizeBlockwiseSmall(float, 64, FP4)
17891789
MAKE_kQuantizeBlockwiseSmall(bnb_bfloat16, 64, FP4)

csrc/ops.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ void quantizeBlockwise(
5151
else if (blocksize == 128)
5252
kQuantizeBlockwise<T, 128, 2, 0, DATA_TYPE><<<num_blocks, 64>>>(code, A, absmax, out, rand, rand_offset, n);
5353
else if (blocksize == 64) {
54-
#if BNB_HIP
5554
if constexpr (DATA_TYPE > 0) {
5655
const int ws = bnb_host_warp_size();
5756
const int num_qb = ws / (64 / 2);
@@ -61,9 +60,6 @@ void quantizeBlockwise(
6160
} else {
6261
kQuantizeBlockwise<T, 64, 2, 0, DATA_TYPE><<<num_blocks, 32>>>(code, A, absmax, out, rand, rand_offset, n);
6362
}
64-
#else
65-
kQuantizeBlockwise<T, 64, 2, 0, DATA_TYPE><<<num_blocks, 32>>>(code, A, absmax, out, rand, rand_offset, n);
66-
#endif
6763
} else if (blocksize == 32) {
6864
if constexpr (DATA_TYPE > 0) {
6965
const int ws = bnb_host_warp_size();

0 commit comments

Comments
 (0)