Skip to content

Commit a5c3b00

Browse files
committed
Merge remote-tracking branch 'prism/HEAD' into prism-ml
2 parents b1fd4c8 + d104cf1 commit a5c3b00

35 files changed

Lines changed: 1508 additions & 22 deletions

.github/workflows/release-prism.yml

Lines changed: 742 additions & 0 deletions
Large diffs are not rendered by default.

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,8 @@ def prepare_tensors(self):
880880
data_qtype = gguf.GGMLQuantizationType.TQ1_0
881881
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0:
882882
data_qtype = gguf.GGMLQuantizationType.TQ2_0
883+
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q2_0:
884+
data_qtype = gguf.GGMLQuantizationType.Q2_0
883885
else:
884886
raise ValueError(f"Unknown file type: {self.ftype.name}")
885887

@@ -14038,6 +14040,7 @@ def main() -> None:
1403814040
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
1403914041
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
1404014042
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,
14043+
"q2_0": gguf.LlamaFileType.MOSTLY_Q2_0,
1404114044
"auto": gguf.LlamaFileType.GUESSED,
1404214045
}
1404314046

ggml/include/ggml.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,8 @@ extern "C" {
434434
GGML_TYPE_TURBO4_0 = 44, // TurboQuant 4-bit KV cache: WHT + 4-bit PolarQuant
435435
GGML_TYPE_TQ3_1S = 45, // TurboQuant 3-bit weight: WHT-rotated 8-level Lloyd-Max, block_size=32
436436
GGML_TYPE_TQ4_1S = 46, // TurboQuant 4-bit weight: WHT-rotated 16-level Lloyd-Max, block_size=32
437-
GGML_TYPE_COUNT = 47,
437+
GGML_TYPE_Q2_0 = 47,
438+
GGML_TYPE_COUNT = 48,
438439
};
439440

440441
// precision
@@ -478,6 +479,7 @@ extern "C" {
478479
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
479480
GGML_FTYPE_MOSTLY_NVFP4 = 26, // except 1d tensors
480481
GGML_FTYPE_MOSTLY_Q1_0 = 27, // except 1d tensors
482+
GGML_FTYPE_MOSTLY_Q2_0 = 28, // except 1d tensors
481483
};
482484

483485
// available tensor operations:

ggml/src/ggml-common.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ typedef sycl::half2 ggml_half2;
9696
#define QI1_0 (QK1_0 / 32)
9797
#define QR1_0 1
9898

99+
#define QI2_0 (QK2_0 / 32)
100+
#define QR2_0 1
101+
99102

100103
#define QI4_0 (QK4_0 / (4 * QR4_0))
101104
#define QR4_0 2
@@ -181,6 +184,13 @@ typedef struct {
181184
} block_q1_0;
182185
static_assert(sizeof(block_q1_0) == sizeof(ggml_half) + QK1_0 / 8, "wrong q1_0 block size/padding");
183186

187+
#define QK2_0 128
188+
typedef struct {
189+
ggml_half d; // delta (scale)
190+
uint8_t qs[QK2_0 / 4]; // 2 bits per element
191+
} block_q2_0;
192+
static_assert(sizeof(block_q2_0) == sizeof(ggml_half) + QK2_0 / 4, "wrong q2_0 block size/padding");
193+
184194
#define QK4_0 32
185195
typedef struct {
186196
ggml_half d; // delta

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
1818
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
1919
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
20+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
2021
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
2122
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
2223
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
@@ -83,6 +84,7 @@
8384
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
8485
// quants.c
8586
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
87+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
8688
// repack.cpp
8789
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
8890
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
@@ -114,6 +116,7 @@
114116
#define quantize_row_q8_K_generic quantize_row_q8_K
115117
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
116118
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
119+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
117120
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
118121
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
119122
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
@@ -163,6 +166,7 @@
163166
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
164167
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
165168
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
169+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
166170
// repack.cpp
167171
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
168172
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
@@ -203,6 +207,8 @@
203207
#elif defined(__riscv)
204208
// quants.c
205209
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
210+
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
211+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
206212
// repack.cpp
207213
#define ggml_quantize_mat_q8_0_4x1_generic ggml_quantize_mat_q8_0_4x1
208214
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
@@ -244,6 +250,7 @@
244250
#define quantize_row_q8_K_generic quantize_row_q8_K
245251
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
246252
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
253+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
247254
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
248255
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
249256
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
@@ -308,6 +315,7 @@
308315
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
309316
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
310317
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
318+
#define ggml_vec_dot_q2_0_q8_0_generic ggml_vec_dot_q2_0_q8_0
311319
// repack.cpp
312320
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
313321
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8

ggml/src/ggml-cpu/arch/arm/quants.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,80 @@ void ggml_vec_dot_q1_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
219219
#endif
220220
}
221221

222+
void ggml_vec_dot_q2_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
223+
const int qk = QK2_0;
224+
const int nb = n / qk;
225+
226+
assert(n % qk == 0);
227+
assert(nrc == 1);
228+
UNUSED(nrc);
229+
UNUSED(bx);
230+
UNUSED(by);
231+
UNUSED(bs);
232+
233+
const block_q2_0 * GGML_RESTRICT x = vx;
234+
const block_q8_0 * GGML_RESTRICT y = vy;
235+
236+
float sumf = 0.0f;
237+
238+
#if defined(__ARM_NEON)
239+
// Replicate pattern: each byte repeated 4 times
240+
static const uint8_t tbl_idx_lo[16] = {0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3};
241+
static const uint8_t tbl_idx_hi[16] = {4,4,4,4, 5,5,5,5, 6,6,6,6, 7,7,7,7};
242+
// Right-shift amounts: 0,2,4,6 repeated for each group of 4
243+
static const int8_t shift_vals[16] = {0,-2,-4,-6, 0,-2,-4,-6, 0,-2,-4,-6, 0,-2,-4,-6};
244+
245+
const uint8x16_t idx_lo = vld1q_u8(tbl_idx_lo);
246+
const uint8x16_t idx_hi = vld1q_u8(tbl_idx_hi);
247+
const int8x16_t shifts = vld1q_s8(shift_vals);
248+
const uint8x16_t mask2 = vdupq_n_u8(0x03);
249+
const int8x16_t one = vdupq_n_s8(1);
250+
251+
float32x4_t sumv = vdupq_n_f32(0.0f);
252+
253+
for (int i = 0; i < nb; i++) {
254+
const float d0 = GGML_CPU_FP16_TO_FP32(x[i].d);
255+
256+
for (int k = 0; k < 4; k++) {
257+
const block_q8_0 * GGML_RESTRICT yb = &y[i * 4 + k];
258+
const float d1 = GGML_CPU_FP16_TO_FP32(yb->d);
259+
260+
// Load 8 bytes of packed 2-bit values
261+
const uint8x8_t raw = vld1_u8(&x[i].qs[k * 8]);
262+
const uint8x16_t raw16 = vcombine_u8(raw, raw);
263+
264+
// First 16 elements: replicate bytes 0-3, shift, mask, subtract 1
265+
uint8x16_t bytes0 = vqtbl1q_u8(raw16, idx_lo);
266+
int8x16_t qv0 = vsubq_s8(
267+
vreinterpretq_s8_u8(vandq_u8(vshlq_u8(bytes0, shifts), mask2)),
268+
one);
269+
270+
// Second 16 elements: replicate bytes 4-7, shift, mask, subtract 1
271+
uint8x16_t bytes1 = vqtbl1q_u8(raw16, idx_hi);
272+
int8x16_t qv1 = vsubq_s8(
273+
vreinterpretq_s8_u8(vandq_u8(vshlq_u8(bytes1, shifts), mask2)),
274+
one);
275+
276+
// Load Q8_0 values and dot product
277+
const int8x16_t y0 = vld1q_s8(yb->qs);
278+
const int8x16_t y1 = vld1q_s8(yb->qs + 16);
279+
280+
int32x4_t p0 = ggml_vdotq_s32(vdupq_n_s32(0), qv0, y0);
281+
int32x4_t p1 = ggml_vdotq_s32(p0, qv1, y1);
282+
283+
sumv = vmlaq_n_f32(sumv, vcvtq_f32_s32(p1), d0 * d1);
284+
}
285+
}
286+
287+
sumf = vaddvq_f32(sumv);
288+
#else
289+
ggml_vec_dot_q2_0_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
290+
return;
291+
#endif
292+
293+
*s = sumf;
294+
}
295+
222296

223297
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
224298
const int qk = QK8_0;

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
241241
.vec_dot_type = GGML_TYPE_Q8_0,
242242
.nrows = 1,
243243
},
244+
[GGML_TYPE_Q2_0] = {
245+
.from_float = quantize_row_q2_0,
246+
.vec_dot = ggml_vec_dot_q2_0_q8_0,
247+
.vec_dot_type = GGML_TYPE_Q8_0,
248+
.nrows = 1,
249+
},
244250
[GGML_TYPE_Q4_0] = {
245251
.from_float = quantize_row_q4_0,
246252
.vec_dot = ggml_vec_dot_q4_0_q8_0,

ggml/src/ggml-cpu/ops.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ void ggml_compute_forward_add(
669669
ggml_compute_forward_add_non_quantized(params, dst);
670670
} break;
671671
case GGML_TYPE_Q1_0:
672+
case GGML_TYPE_Q2_0:
672673
case GGML_TYPE_Q4_0:
673674
case GGML_TYPE_Q4_1:
674675
case GGML_TYPE_Q5_0:
@@ -1121,6 +1122,7 @@ void ggml_compute_forward_add1(
11211122
}
11221123
} break;
11231124
case GGML_TYPE_Q1_0:
1125+
case GGML_TYPE_Q2_0:
11241126
case GGML_TYPE_Q4_0:
11251127
case GGML_TYPE_Q4_1:
11261128
case GGML_TYPE_Q5_0:
@@ -1253,6 +1255,7 @@ void ggml_compute_forward_acc(
12531255
case GGML_TYPE_F16:
12541256
case GGML_TYPE_BF16:
12551257
case GGML_TYPE_Q1_0:
1258+
case GGML_TYPE_Q2_0:
12561259
case GGML_TYPE_Q4_0:
12571260
case GGML_TYPE_Q4_1:
12581261
case GGML_TYPE_Q5_0:
@@ -4391,6 +4394,7 @@ void ggml_compute_forward_out_prod(
43914394

43924395
switch (src0->type) {
43934396
case GGML_TYPE_Q1_0:
4397+
case GGML_TYPE_Q2_0:
43944398
case GGML_TYPE_Q4_0:
43954399
case GGML_TYPE_Q4_1:
43964400
case GGML_TYPE_Q5_0:
@@ -4669,6 +4673,7 @@ void ggml_compute_forward_set(
46694673
case GGML_TYPE_F16:
46704674
case GGML_TYPE_BF16:
46714675
case GGML_TYPE_Q1_0:
4676+
case GGML_TYPE_Q2_0:
46724677
case GGML_TYPE_Q4_0:
46734678
case GGML_TYPE_Q4_1:
46744679
case GGML_TYPE_Q5_0:
@@ -4895,6 +4900,7 @@ void ggml_compute_forward_get_rows(
48954900

48964901
switch (src0->type) {
48974902
case GGML_TYPE_Q1_0:
4903+
case GGML_TYPE_Q2_0:
48984904
case GGML_TYPE_Q4_0:
48994905
case GGML_TYPE_Q4_1:
49004906
case GGML_TYPE_Q5_0:
@@ -5630,6 +5636,7 @@ void ggml_compute_forward_clamp(
56305636
} break;
56315637
case GGML_TYPE_BF16:
56325638
case GGML_TYPE_Q1_0:
5639+
case GGML_TYPE_Q2_0:
56335640
case GGML_TYPE_Q4_0:
56345641
case GGML_TYPE_Q4_1:
56355642
case GGML_TYPE_Q5_0:

ggml/src/ggml-cpu/quants.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ void quantize_row_q1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
2626
quantize_row_q1_0_ref(x, y, k);
2727
}
2828

29+
void quantize_row_q2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
30+
quantize_row_q2_0_ref(x, y, k);
31+
}
32+
2933
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
3034
quantize_row_q4_0_ref(x, y, k);
3135
}
@@ -170,6 +174,52 @@ void ggml_vec_dot_q1_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
170174
*s = sumf;
171175
}
172176

177+
void ggml_vec_dot_q2_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
178+
const int qk = QK2_0;
179+
const int nb = n / qk;
180+
181+
assert(n % qk == 0);
182+
assert(nrc == 1);
183+
UNUSED(nrc);
184+
UNUSED(bx);
185+
UNUSED(by);
186+
UNUSED(bs);
187+
188+
const block_q2_0 * GGML_RESTRICT x = vx;
189+
const block_q8_0 * GGML_RESTRICT y = vy;
190+
191+
float sumf = 0.0f;
192+
193+
for (int i = 0; i < nb; i++) {
194+
const float d0 = GGML_CPU_FP16_TO_FP32(x[i].d);
195+
196+
float sumi = 0.0f;
197+
198+
for (int k = 0; k < 4; k++) {
199+
const block_q8_0 * GGML_RESTRICT yb = &y[i * 4 + k];
200+
const float d1 = GGML_CPU_FP16_TO_FP32(yb->d);
201+
int sumi_block = 0;
202+
203+
const uint8_t * GGML_RESTRICT qs = &x[i].qs[k * 8];
204+
const int8_t * GGML_RESTRICT qy = yb->qs;
205+
206+
for (int b = 0; b < 8; ++b) {
207+
const uint8_t byte = qs[b];
208+
// Extract 4 two-bit values, map {0,1,2,3} -> {-1,0,1,2}
209+
sumi_block += ((int)((byte >> 0) & 3) - 1) * qy[b*4 + 0];
210+
sumi_block += ((int)((byte >> 2) & 3) - 1) * qy[b*4 + 1];
211+
sumi_block += ((int)((byte >> 4) & 3) - 1) * qy[b*4 + 2];
212+
sumi_block += ((int)((byte >> 6) & 3) - 1) * qy[b*4 + 3];
213+
}
214+
215+
sumi += d1 * sumi_block;
216+
}
217+
218+
sumf += d0 * sumi;
219+
}
220+
221+
*s = sumf;
222+
}
173223

174224
void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
175225
const int qk = QK8_0;

ggml/src/ggml-cpu/quants.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ extern "C" {
1313

1414
// Quantization
1515
void quantize_row_q1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
16+
void quantize_row_q2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1617
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1718
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
1819
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -38,6 +39,7 @@ void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y,
3839

3940
// Dot product
4041
void ggml_vec_dot_q1_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
42+
void ggml_vec_dot_q2_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4143
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4244
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4345
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
@@ -71,6 +73,7 @@ void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRI
7173
void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
7274
void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
7375
void ggml_vec_dot_q1_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
76+
void ggml_vec_dot_q2_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
7477
void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
7578
void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
7679
void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);

0 commit comments

Comments
 (0)