@@ -2470,55 +2470,43 @@ MAKE_kQuantizeBlockwise(half, 1024, 4, 0, General8bit)
24702470MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , General8bit)
24712471MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , General8bit)
24722472MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , General8bit)
2473- #if BNB_WARP_SIZE == 32
24742473MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , General8bit)
2475- #endif
24762474MAKE_kQuantizeBlockwise(half, 4096 , 4 , 0 , FP4)
24772475MAKE_kQuantizeBlockwise(half, 2048 , 4 , 0 , FP4)
24782476MAKE_kQuantizeBlockwise(half, 1024 , 4 , 0 , FP4)
24792477MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , FP4)
24802478MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , FP4)
24812479MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , FP4)
2482- #if BNB_WARP_SIZE == 32
24832480MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , FP4)
2484- #endif
24852481MAKE_kQuantizeBlockwise(half, 4096 , 4 , 0 , NF4)
24862482MAKE_kQuantizeBlockwise(half, 2048 , 4 , 0 , NF4)
24872483MAKE_kQuantizeBlockwise(half, 1024 , 4 , 0 , NF4)
24882484MAKE_kQuantizeBlockwise(half, 512 , 2 , 0 , NF4)
24892485MAKE_kQuantizeBlockwise(half, 256 , 2 , 0 , NF4)
24902486MAKE_kQuantizeBlockwise(half, 128 , 2 , 0 , NF4)
2491- #if BNB_WARP_SIZE == 32
24922487MAKE_kQuantizeBlockwise(half, 64 , 2 , 0 , NF4)
2493- #endif
24942488MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , General8bit)
24952489MAKE_kQuantizeBlockwise(float , 4096 , 4 , 1 , General8bit)
24962490MAKE_kQuantizeBlockwise(float , 2048 , 4 , 0 , General8bit)
24972491MAKE_kQuantizeBlockwise(float , 1024 , 4 , 0 , General8bit)
24982492MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , General8bit)
24992493MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , General8bit)
25002494MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , General8bit)
2501- #if BNB_WARP_SIZE == 32
25022495MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , General8bit)
2503- #endif
25042496MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , FP4)
25052497MAKE_kQuantizeBlockwise(float , 2048 , 4 , 0 , FP4)
25062498MAKE_kQuantizeBlockwise(float , 1024 , 4 , 0 , FP4)
25072499MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , FP4)
25082500MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , FP4)
25092501MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , FP4)
2510- #if BNB_WARP_SIZE == 32
25112502MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , FP4)
2512- #endif
25132503MAKE_kQuantizeBlockwise(float , 4096 , 4 , 0 , NF4)
25142504MAKE_kQuantizeBlockwise(float , 2048 , 4 , 0 , NF4)
25152505MAKE_kQuantizeBlockwise(float , 1024 , 4 , 0 , NF4)
25162506MAKE_kQuantizeBlockwise(float , 512 , 2 , 0 , NF4)
25172507MAKE_kQuantizeBlockwise(float , 256 , 2 , 0 , NF4)
25182508MAKE_kQuantizeBlockwise(float , 128 , 2 , 0 , NF4)
2519- #if BNB_WARP_SIZE == 32
25202509MAKE_kQuantizeBlockwise(float , 64 , 2 , 0 , NF4)
2521- #endif
25222510
25232511MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096 , 4 , 0 , General8bit)
25242512MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096 , 4 , 1 , General8bit)
@@ -2527,27 +2515,21 @@ MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024, 4, 0, General8bit)
25272515MAKE_kQuantizeBlockwise(bnb_bfloat16, 512 , 2 , 0 , General8bit)
25282516MAKE_kQuantizeBlockwise(bnb_bfloat16, 256 , 2 , 0 , General8bit)
25292517MAKE_kQuantizeBlockwise(bnb_bfloat16, 128 , 2 , 0 , General8bit)
2530- #if BNB_WARP_SIZE == 32
25312518MAKE_kQuantizeBlockwise(bnb_bfloat16, 64 , 2 , 0 , General8bit)
2532- #endif
25332519MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096 , 4 , 0 , FP4)
25342520MAKE_kQuantizeBlockwise(bnb_bfloat16, 2048 , 4 , 0 , FP4)
25352521MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024 , 4 , 0 , FP4)
25362522MAKE_kQuantizeBlockwise(bnb_bfloat16, 512 , 2 , 0 , FP4)
25372523MAKE_kQuantizeBlockwise(bnb_bfloat16, 256 , 2 , 0 , FP4)
25382524MAKE_kQuantizeBlockwise(bnb_bfloat16, 128 , 2 , 0 , FP4)
2539- #if BNB_WARP_SIZE == 32
25402525MAKE_kQuantizeBlockwise(bnb_bfloat16, 64 , 2 , 0 , FP4)
2541- #endif
25422526MAKE_kQuantizeBlockwise(bnb_bfloat16, 4096 , 4 , 0 , NF4)
25432527MAKE_kQuantizeBlockwise(bnb_bfloat16, 2048 , 4 , 0 , NF4)
25442528MAKE_kQuantizeBlockwise(bnb_bfloat16, 1024 , 4 , 0 , NF4)
25452529MAKE_kQuantizeBlockwise(bnb_bfloat16, 512 , 2 , 0 , NF4)
25462530MAKE_kQuantizeBlockwise(bnb_bfloat16, 256 , 2 , 0 , NF4)
25472531MAKE_kQuantizeBlockwise(bnb_bfloat16, 128 , 2 , 0 , NF4)
2548- #if BNB_WARP_SIZE == 32
25492532MAKE_kQuantizeBlockwise(bnb_bfloat16, 64 , 2 , 0 , NF4)
2550- #endif
25512533
25522534// Template instantiations for blocksize=32 specialized kernel (4-bit only)
25532535#define MAKE_kQuantizeBlockwiseSmall (dtype, data_type_name ) \
0 commit comments