Skip to content

Commit 665abc6

Browse files
authored
add fast mat-vec kernels for i-quants (#22344)
1 parent 4414c04 commit 665abc6

3 files changed

Lines changed: 543 additions & 0 deletions

File tree

ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,24 @@ class ggml_webgpu_shader_lib {
16151615
defines.push_back("MUL_ACC_" + type_upper);
16161616
defines.push_back("U32_DEQUANT_HELPERS");
16171617
defines.push_back("SRC0_INNER_TYPE=u32");
1618+
switch (context.src0->type) {
1619+
case GGML_TYPE_IQ1_S:
1620+
case GGML_TYPE_IQ1_M:
1621+
case GGML_TYPE_IQ2_S:
1622+
case GGML_TYPE_IQ3_S:
1623+
case GGML_TYPE_IQ4_NL:
1624+
case GGML_TYPE_IQ4_XS:
1625+
defines.push_back(type_upper + "_GRID");
1626+
break;
1627+
case GGML_TYPE_IQ2_XXS:
1628+
case GGML_TYPE_IQ2_XS:
1629+
case GGML_TYPE_IQ3_XXS:
1630+
defines.push_back(type_upper + "_GRID");
1631+
defines.push_back(type_upper + "_TABLES");
1632+
break;
1633+
default:
1634+
break;
1635+
}
16181636
break;
16191637
}
16201638
}

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,17 @@ static webgpu_encoded_op ggml_webgpu_mul_mat(webgpu_context & ctx,
13911391
case GGML_TYPE_Q2_K:
13921392
use_fast = true;
13931393
break;
1394+
case GGML_TYPE_IQ1_S:
1395+
case GGML_TYPE_IQ1_M:
1396+
case GGML_TYPE_IQ2_XXS:
1397+
case GGML_TYPE_IQ2_XS:
1398+
case GGML_TYPE_IQ2_S:
1399+
case GGML_TYPE_IQ3_XXS:
1400+
case GGML_TYPE_IQ3_S:
1401+
case GGML_TYPE_IQ4_NL:
1402+
case GGML_TYPE_IQ4_XS:
1403+
use_fast = is_vec;
1404+
break;
13941405
default:
13951406
break;
13961407
}

0 commit comments

Comments
 (0)