Skip to content

Commit 518b109

Browse files
SharmaRithikmeh
authored andcommitted
add fast matmul iquants (ggml-org#22504)
1 parent 1468f64 commit 518b109

3 files changed

Lines changed: 443 additions & 1 deletion

File tree

ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,6 +1806,25 @@ class ggml_webgpu_shader_lib {
18061806
defines.push_back("U32_DEQUANT_HELPERS");
18071807
defines.push_back("SRC0_INNER_TYPE=u32");
18081808

1809+
switch (context.src0->type) {
1810+
case GGML_TYPE_IQ1_S:
1811+
case GGML_TYPE_IQ1_M:
1812+
case GGML_TYPE_IQ4_NL:
1813+
case GGML_TYPE_IQ4_XS:
1814+
defines.push_back(type_upper + "_GRID");
1815+
break;
1816+
case GGML_TYPE_IQ2_XXS:
1817+
case GGML_TYPE_IQ2_XS:
1818+
case GGML_TYPE_IQ2_S:
1819+
case GGML_TYPE_IQ3_XXS:
1820+
case GGML_TYPE_IQ3_S:
1821+
defines.push_back(type_upper + "_GRID");
1822+
defines.push_back(type_upper + "_TABLES");
1823+
break;
1824+
default:
1825+
break;
1826+
}
1827+
18091828
variant += std::string("_") + src0_name;
18101829
break;
18111830
}

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1422,7 +1422,7 @@ static webgpu_encoded_op ggml_webgpu_mul_mat(webgpu_context & ctx,
14221422
case GGML_TYPE_IQ3_S:
14231423
case GGML_TYPE_IQ4_NL:
14241424
case GGML_TYPE_IQ4_XS:
1425-
use_fast = is_vec;
1425+
use_fast = true;
14261426
break;
14271427
default:
14281428
break;

0 commit comments

Comments
 (0)