Skip to content

Commit 32cd056

Browse files
TimDettmersclaude
andcommitted
Remove blocksize=64 instantiation guards
Now that kQuantizeBlockwise falls back to BLOCK_LOAD_DIRECT when threads < warp_size, the blocksize=64 instantiations compile correctly on both CUDA and HIP. The guards were causing linker errors because ops.cu still references these symbols for the General8bit dispatch path. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 50cef42 commit 32cd056

File tree

1 file changed

+0
-18
lines changed

1 file changed

+0
-18
lines changed

csrc/kernels.cu

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2470,55 +2470,43 @@ MAKE_kQuantizeBlockwise(half, 1024, 4, 0, General8bit)
24702470
MAKE_kQuantizeBlockwise(half, 512, 2, 0, General8bit)
24712471
MAKE_kQuantizeBlockwise(half, 256, 2, 0, General8bit)
24722472
MAKE_kQuantizeBlockwise(half, 128, 2, 0, General8bit)
2473-
#if BNB_WARP_SIZE == 32
24742473
MAKE_kQuantizeBlockwise(half, 64, 2, 0, General8bit)
2475-
#endif
24762474
MAKE_kQuantizeBlockwise(half, 4096, 4, 0, FP4)
24772475
MAKE_kQuantizeBlockwise(half, 2048, 4, 0, FP4)
24782476
MAKE_kQuantizeBlockwise(half, 1024, 4, 0, FP4)
24792477
MAKE_kQuantizeBlockwise(half, 512, 2, 0, FP4)
24802478
MAKE_kQuantizeBlockwise(half, 256, 2, 0, FP4)
24812479
MAKE_kQuantizeBlockwise(half, 128, 2, 0, FP4)
2482-
#if BNB_WARP_SIZE == 32
24832480
MAKE_kQuantizeBlockwise(half, 64, 2, 0, FP4)
2484-
#endif
24852481
MAKE_kQuantizeBlockwise(half, 4096, 4, 0, NF4)
24862482
MAKE_kQuantizeBlockwise(half, 2048, 4, 0, NF4)
24872483
MAKE_kQuantizeBlockwise(half, 1024, 4, 0, NF4)
24882484
MAKE_kQuantizeBlockwise(half, 512, 2, 0, NF4)
24892485
MAKE_kQuantizeBlockwise(half, 256, 2, 0, NF4)
24902486
MAKE_kQuantizeBlockwise(half, 128, 2, 0, NF4)
2491-
#if BNB_WARP_SIZE == 32
24922487
MAKE_kQuantizeBlockwise(half, 64, 2, 0, NF4)
2493-
#endif
24942488
MAKE_kQuantizeBlockwise(float, 4096, 4, 0, General8bit)
24952489
MAKE_kQuantizeBlockwise(float, 4096, 4, 1, General8bit)
24962490
MAKE_kQuantizeBlockwise(float, 2048, 4, 0, General8bit)
24972491
MAKE_kQuantizeBlockwise(float, 1024, 4, 0, General8bit)
24982492
MAKE_kQuantizeBlockwise(float, 512, 2, 0, General8bit)
24992493
MAKE_kQuantizeBlockwise(float, 256, 2, 0, General8bit)
25002494
MAKE_kQuantizeBlockwise(float, 128, 2, 0, General8bit)
2501-
#if BNB_WARP_SIZE == 32
25022495
MAKE_kQuantizeBlockwise(float, 64, 2, 0, General8bit)
2503-
#endif
25042496
MAKE_kQuantizeBlockwise(float, 4096, 4, 0, FP4)
25052497
MAKE_kQuantizeBlockwise(float, 2048, 4, 0, FP4)
25062498
MAKE_kQuantizeBlockwise(float, 1024, 4, 0, FP4)
25072499
MAKE_kQuantizeBlockwise(float, 512, 2, 0, FP4)
25082500
MAKE_kQuantizeBlockwise(float, 256, 2, 0, FP4)
25092501
MAKE_kQuantizeBlockwise(float, 128, 2, 0, FP4)
2510-
#if BNB_WARP_SIZE == 32
25112502
MAKE_kQuantizeBlockwise(float, 64, 2, 0, FP4)
2512-
#endif
25132503
MAKE_kQuantizeBlockwise(float, 4096, 4, 0, NF4)
25142504
MAKE_kQuantizeBlockwise(float, 2048, 4, 0, NF4)
25152505
MAKE_kQuantizeBlockwise(float, 1024, 4, 0, NF4)
25162506
MAKE_kQuantizeBlockwise(float, 512, 2, 0, NF4)
25172507
MAKE_kQuantizeBlockwise(float, 256, 2, 0, NF4)
25182508
MAKE_kQuantizeBlockwise(float, 128, 2, 0, NF4)
2519-
#if BNB_WARP_SIZE == 32
25202509
MAKE_kQuantizeBlockwise(float, 64, 2, 0, NF4)
2521-
#endif
25222510

25232511
MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096, 4, 0, General8bit)
25242512
MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096, 4, 1, General8bit)
@@ -2527,27 +2515,21 @@ MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024, 4, 0, General8bit)
25272515
MAKE_kQuantizeBlockwise(bnb_bfloat16, 512, 2, 0, General8bit)
25282516
MAKE_kQuantizeBlockwise(bnb_bfloat16, 256, 2, 0, General8bit)
25292517
MAKE_kQuantizeBlockwise(bnb_bfloat16, 128, 2, 0, General8bit)
2530-
#if BNB_WARP_SIZE == 32
25312518
MAKE_kQuantizeBlockwise(bnb_bfloat16, 64, 2, 0, General8bit)
2532-
#endif
25332519
MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096, 4, 0, FP4)
25342520
MAKE_kQuantizeBlockwise(bnb_bfloat16, 2048, 4, 0, FP4)
25352521
MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024, 4, 0, FP4)
25362522
MAKE_kQuantizeBlockwise(bnb_bfloat16, 512, 2, 0, FP4)
25372523
MAKE_kQuantizeBlockwise(bnb_bfloat16, 256, 2, 0, FP4)
25382524
MAKE_kQuantizeBlockwise(bnb_bfloat16, 128, 2, 0, FP4)
2539-
#if BNB_WARP_SIZE == 32
25402525
MAKE_kQuantizeBlockwise(bnb_bfloat16, 64, 2, 0, FP4)
2541-
#endif
25422526
MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096, 4, 0, NF4)
25432527
MAKE_kQuantizeBlockwise(bnb_bfloat16, 2048, 4, 0, NF4)
25442528
MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024, 4, 0, NF4)
25452529
MAKE_kQuantizeBlockwise(bnb_bfloat16, 512, 2, 0, NF4)
25462530
MAKE_kQuantizeBlockwise(bnb_bfloat16, 256, 2, 0, NF4)
25472531
MAKE_kQuantizeBlockwise(bnb_bfloat16, 128, 2, 0, NF4)
2548-
#if BNB_WARP_SIZE == 32
25492532
MAKE_kQuantizeBlockwise(bnb_bfloat16, 64, 2, 0, NF4)
2550-
#endif
25512533

25522534
// Template instantiations for blocksize=32 specialized kernel (4-bit only)
25532535
#define MAKE_kQuantizeBlockwiseSmall(dtype, data_type_name) \

0 commit comments

Comments
 (0)