@@ -247,6 +247,33 @@ public object DequantOps {
247247 }
248248 }
249249
250+ /* *
251+ * Returns (bytesPerBlock, elemsPerBlock) for a given quantization type.
252+ * Useful for chunked dequantization on single-threaded platforms (WASM).
253+ */
254+ public fun blockInfoFor (type : GGMLQuantizationType ): Pair <Int , Int > = when (type) {
255+ GGMLQuantizationType .F16 -> Pair (2 , 1 )
256+ GGMLQuantizationType .BF16 -> Pair (2 , 1 )
257+ GGMLQuantizationType .F32 -> Pair (4 , 1 )
258+ GGMLQuantizationType .Q4_0 -> Pair (18 , 32 )
259+ GGMLQuantizationType .Q4_1 -> Pair (20 , 32 )
260+ GGMLQuantizationType .Q5_0 -> Pair (22 , 32 )
261+ GGMLQuantizationType .Q5_1 -> Pair (24 , 32 )
262+ GGMLQuantizationType .Q8_0 -> Pair (34 , 32 )
263+ GGMLQuantizationType .Q8_1 -> Pair (40 , 32 )
264+ GGMLQuantizationType .IQ4_NL -> Pair (18 , 32 )
265+ GGMLQuantizationType .IQ4_XS -> Pair (2 + 2 + QK_K / 2 + QK_K / 64 , QK_K )
266+ GGMLQuantizationType .Q2_K -> Pair (2 + 2 + QK_K / 16 + QK_K / 4 , QK_K )
267+ GGMLQuantizationType .Q3_K -> Pair (2 + QK_K / 4 + QK_K / 8 + 12 , QK_K )
268+ GGMLQuantizationType .Q4_K -> Pair (144 , QK_K )
269+ GGMLQuantizationType .Q5_K -> Pair (176 , QK_K )
270+ GGMLQuantizationType .Q6_K -> Pair (210 , QK_K )
271+ GGMLQuantizationType .Q8_K -> Pair (292 , QK_K )
272+ GGMLQuantizationType .TQ1_0 -> Pair (54 , 256 )
273+ GGMLQuantizationType .TQ2_0 -> Pair (66 , 256 )
274+ else -> error(" Block info for $type not available" )
275+ }
276+
250277 // ========== ByteArray-based quantization implementations ==========
251278
252279 @Suppress(" UNUSED_PARAMETER" )
0 commit comments