|
10 | 10 |
|
11 | 11 | import bitsandbytes as bnb |
12 | 12 | from bitsandbytes import functional as F |
13 | | -from bitsandbytes.cextension import ROCM_WARP_SIZE_64 |
14 | 13 | from tests.helpers import ( |
15 | 14 | BOOLEAN_TUPLES, |
16 | 15 | TRUE_FALSE, |
@@ -96,7 +95,7 @@ class Test8BitBlockwiseQuantizeFunctional: |
96 | 95 | @pytest.mark.parametrize("nested", TRUE_FALSE, ids=id_formatter("nested")) |
97 | 96 | @pytest.mark.parametrize( |
98 | 97 | "blocksize", |
99 | | - [4096, 2048, 1024, 512, 256, 128, 64] if not ROCM_WARP_SIZE_64 else [4096, 2048, 1024, 512, 256, 128], |
| 98 | + [4096, 2048, 1024, 512, 256, 128, 64], |
100 | 99 | ) |
101 | 100 | @pytest.mark.parametrize("signed", TRUE_FALSE, ids=id_formatter("signed")) |
102 | 101 | def test_dynamic_blockwise_quantization(self, device, dtype, nested, blocksize, signed): |
@@ -509,7 +508,6 @@ def test_dim3_igemm(self, seq_dim, hidden_dim, batch_dim): |
509 | 508 | @pytest.mark.parametrize("hidden_dim", [32, 1024 * 4], ids=id_formatter("hidden_dim")) |
510 | 509 | @pytest.mark.parametrize("batch_dim", [2, 16], ids=id_formatter("batch_dim")) |
511 | 510 | @pytest.mark.parametrize("transpose", TRUE_FALSE, ids=id_formatter("transpose")) |
512 | | - @pytest.mark.skipif(ROCM_WARP_SIZE_64, reason="this test is not supported on ROCm yet") |
513 | 511 | def test_minmax_igemm(self, seq_dim, hidden_dim, batch_dim, transpose): |
514 | 512 | def min_max(x): |
515 | 513 | maxA = torch.amax(x, dim=2, keepdim=True) |
@@ -844,7 +842,7 @@ class TestQuantize4BitFunctional: |
844 | 842 | @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) |
845 | 843 | @pytest.mark.parametrize( |
846 | 844 | "blocksize", |
847 | | - [32, 64, 128, 256, 512, 1024, 2048, 4096] if not ROCM_WARP_SIZE_64 else [64, 128, 256, 512, 1024, 2048, 4096], |
| 845 | + [32, 64, 128, 256, 512, 1024, 2048, 4096], |
848 | 846 | ) |
849 | 847 | def test_4bit_quant(self, device, dtype, quant_type, blocksize): |
850 | 848 | if device == "hpu" and not is_supported_on_hpu(quant_type, dtype): |
@@ -927,9 +925,7 @@ def test_4bit_quant(self, device, dtype, quant_type, blocksize): |
927 | 925 |
|
928 | 926 | @pytest.mark.parametrize("device", get_available_devices()) |
929 | 927 | @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) |
930 | | - @pytest.mark.parametrize( |
931 | | - "blocksize", [32, 64, 128] if not ROCM_WARP_SIZE_64 else [64, 128], ids=id_formatter("blocksize") |
932 | | - ) |
| 928 | + @pytest.mark.parametrize("blocksize", [32, 64, 128], ids=id_formatter("blocksize")) |
933 | 929 | @pytest.mark.parametrize("dtype", [torch.float32, torch.float16], ids=describe_dtype) |
934 | 930 | def test_4bit_compressed_stats(self, device, quant_type, blocksize, dtype): |
935 | 931 | if device == "hpu" and not is_supported_on_hpu(quant_type, dtype): |
@@ -966,9 +962,7 @@ def test_4bit_compressed_stats(self, device, quant_type, blocksize, dtype): |
966 | 962 | @pytest.mark.skipif(not get_available_devices(no_cpu=True), reason="No accelerator device") |
967 | 963 | @pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16], ids=describe_dtype) |
968 | 964 | @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) |
969 | | - @pytest.mark.parametrize( |
970 | | - "blocksize", [32, 64, 128] if not ROCM_WARP_SIZE_64 else [64, 128], ids=id_formatter("blocksize") |
971 | | - ) |
| 965 | + @pytest.mark.parametrize("blocksize", [32, 64, 128], ids=id_formatter("blocksize")) |
972 | 966 | def test_4bit_quant_large(self, device, dtype, quant_type, blocksize): |
973 | 967 | """ |
974 | 968 | Test that we can successfully quantize a large tensor. Note that the following limitations apply: |
@@ -1028,9 +1022,6 @@ def test_bench_4bit_dequant(self, quant_type): |
1028 | 1022 | # torch.cuda.synchronize() |
1029 | 1023 | # print((time.time()-t0)/iters*1e6) |
1030 | 1024 |
|
1031 | | - @pytest.mark.skipif( |
1032 | | - ROCM_WARP_SIZE_64, reason="gemv 4bit tests are partially enabled on MI300, others being fixed for warpsize 64" |
1033 | | - ) |
1034 | 1025 | @pytest.mark.parametrize("device", get_available_devices()) |
1035 | 1026 | @pytest.mark.parametrize("double_quant", TRUE_FALSE, ids=lambda double_quant: f"DQ_{double_quant}") |
1036 | 1027 | @pytest.mark.parametrize("storage_type", ["nf4", "fp4"]) |
@@ -1185,7 +1176,6 @@ def test_gemv_4bit(self, device, dim, dtype, storage_type, double_quant, kind): |
1185 | 1176 | @pytest.mark.parametrize("device", get_available_devices()) |
1186 | 1177 | @pytest.mark.parametrize("storage_type", ["nf4", "fp4"], ids=["nf4", "fp4"]) |
1187 | 1178 | @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=describe_dtype) |
1188 | | - @pytest.mark.skipif(ROCM_WARP_SIZE_64, reason="this test is not supported on ROCm yet") |
1189 | 1179 | def test_gemv_eye_4bit(self, device, storage_type, dtype): |
1190 | 1180 | if device == "hpu" and not is_supported_on_hpu(storage_type, dtype): |
1191 | 1181 | pytest.skip("This configuration is not supported on HPU.") |
|
0 commit comments