Skip to content

Commit ba96122

Browse files
committed
port k-quants to new matvec
1 parent f839c10 commit ba96122

3 files changed

Lines changed: 470 additions & 9 deletions

File tree

ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1338,7 +1338,9 @@ class ggml_webgpu_shader_lib {
13381338
const bool use_row_tiled =
13391339
context.src0->type == GGML_TYPE_F32 || context.src0->type == GGML_TYPE_F16 || context.src0->type == GGML_TYPE_Q4_0 ||
13401340
context.src0->type == GGML_TYPE_Q4_1 || context.src0->type == GGML_TYPE_Q5_0 || context.src0->type == GGML_TYPE_Q5_1 ||
1341-
context.src0->type == GGML_TYPE_Q8_0 || context.src0->type == GGML_TYPE_Q8_1;
1341+
context.src0->type == GGML_TYPE_Q8_0 || context.src0->type == GGML_TYPE_Q8_1 || context.src0->type == GGML_TYPE_Q6_K ||
1342+
context.src0->type == GGML_TYPE_Q4_K || context.src0->type == GGML_TYPE_Q5_K || context.src0->type == GGML_TYPE_Q3_K ||
1343+
context.src0->type == GGML_TYPE_Q2_K;
13421344
ggml_webgpu_mul_mat_vec_pipeline_key key = {
13431345
.src0_type = context.src0->type,
13441346
.src1_type = context.src1->type,

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,17 +1329,12 @@ static webgpu_encoded_op ggml_webgpu_mul_mat(webgpu_context & ctx,
13291329
case GGML_TYPE_Q5_1:
13301330
case GGML_TYPE_Q8_0:
13311331
case GGML_TYPE_Q8_1:
1332-
use_fast = true;
1333-
break;
13341332
case GGML_TYPE_Q6_K:
1335-
use_fast = !is_vec || ctx->global_ctx->capabilities.supports_subgroups;
1336-
break;
1337-
case GGML_TYPE_Q2_K:
1338-
case GGML_TYPE_Q3_K:
13391333
case GGML_TYPE_Q4_K:
13401334
case GGML_TYPE_Q5_K:
1341-
// we don't have fast mat-vec for these types, but we do have (semi) fast mat-mat
1342-
use_fast = !is_vec;
1335+
case GGML_TYPE_Q3_K:
1336+
case GGML_TYPE_Q2_K:
1337+
use_fast = true;
13431338
break;
13441339
default:
13451340
break;

0 commit comments

Comments
 (0)