Skip to content

Commit 0c4fb41

Browse files
committed
Fixed inconsistency with fp16 conversion for generic q1_0 dot and deduplicated generic fallback
1 parent 8587b5c commit 0c4fb41

File tree

2 files changed

+6
-35
lines changed

2 files changed

+6
-35
lines changed

ggml/src/ggml-cpu/arch/x86/quants.c

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -700,39 +700,10 @@ void ggml_vec_dot_q1_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
700700

701701
*s = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
702702
#else
703-
float sumf = 0.0f;
704-
705-
for (int ib = 0; ib < nb; ++ib) {
706-
const float d0 = GGML_CPU_FP16_TO_FP32(x[ib].d);
707-
float sumi = 0.0f;
708-
709-
for (int k = 0; k < 4; k++) {
710-
const block_q8_0 * GGML_RESTRICT yb = &y[ib * 4 + k];
711-
const float d1 = GGML_CPU_FP16_TO_FP32(yb->d);
712-
int sumi_block = 0;
713-
714-
const uint8_t * GGML_RESTRICT bits = &x[ib].qs[k * 4];
715-
const int8_t * GGML_RESTRICT qy = yb->qs;
716-
717-
for (int b = 0; b < 4; ++b, qy += 8) {
718-
const unsigned mask = bits[b];
719-
sumi_block += ((mask & 0x01) ? qy[0] : -qy[0])
720-
+ ((mask & 0x02) ? qy[1] : -qy[1])
721-
+ ((mask & 0x04) ? qy[2] : -qy[2])
722-
+ ((mask & 0x08) ? qy[3] : -qy[3])
723-
+ ((mask & 0x10) ? qy[4] : -qy[4])
724-
+ ((mask & 0x20) ? qy[5] : -qy[5])
725-
+ ((mask & 0x40) ? qy[6] : -qy[6])
726-
+ ((mask & 0x80) ? qy[7] : -qy[7]);
727-
}
728-
729-
sumi += d1 * sumi_block;
730-
}
731-
732-
sumf += d0 * sumi;
733-
}
734-
735-
*s = sumf;
703+
UNUSED(nb);
704+
UNUSED(x);
705+
UNUSED(y);
706+
ggml_vec_dot_q1_0_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
736707
#endif
737708
}
738709

ggml/src/ggml-cpu/quants.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,13 @@ void ggml_vec_dot_q1_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
137137
float sumf = 0.0;
138138

139139
for (int i = 0; i < nb; i++) {
140-
const float d0 = GGML_FP16_TO_FP32(x[i].d);
140+
const float d0 = GGML_CPU_FP16_TO_FP32(x[i].d);
141141

142142
float sumi = 0.0f;
143143

144144
for (int k = 0; k < 4; k++) {
145145
const block_q8_0 * GGML_RESTRICT yb = &y[i * 4 + k];
146-
const float d1 = GGML_FP16_TO_FP32(yb->d);
146+
const float d1 = GGML_CPU_FP16_TO_FP32(yb->d);
147147
int sumi_block = 0;
148148

149149
const uint8_t * GGML_RESTRICT bits = &x[i].qs[k * 4];

0 commit comments

Comments
 (0)