Skip to content

Commit 7f82cf0

Browse files
committed
Style cleanup around AVX q1_0 dot
1 parent 0c4fb41 commit 7f82cf0

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

ggml/src/ggml-cpu/arch/x86/quants.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -637,22 +637,22 @@ void ggml_vec_dot_q1_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
637637
const __m256 q = _mm256_cvtepi32_ps(MM256_SET_M128I(sum32_1, sum32_0)); \
638638
acc_block = _mm256_add_ps(acc_block, _mm256_mul_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y_ptr[(K)].d)), q)); \
639639
}
640-
{ \
641-
const __m256i bit_mask = bytes_from_bits_32(&x[ib].qs[0]); \
642-
const __m128i bit_mask_0 = _mm256_castsi256_si128(bit_mask); \
643-
const __m128i bit_mask_1 = _mm256_extractf128_si256(bit_mask, 1); \
644-
const __m128i qy_0 = _mm_loadu_si128((const __m128i *) &y_ptr[0].qs[0]); \
645-
const __m128i qy_1 = _mm_loadu_si128((const __m128i *) &y_ptr[0].qs[16]); \
646-
const __m128i sign_mask_0 = _mm_cmpeq_epi8(bit_mask_0, zero); \
647-
const __m128i sign_mask_1 = _mm_cmpeq_epi8(bit_mask_1, zero); \
648-
const __m128i sy_0 = _mm_sub_epi8(_mm_xor_si128(qy_0, sign_mask_0), sign_mask_0); \
649-
const __m128i sy_1 = _mm_sub_epi8(_mm_xor_si128(qy_1, sign_mask_1), sign_mask_1); \
650-
const __m128i sum16_0 = _mm_maddubs_epi16(ones_8, sy_0); \
651-
const __m128i sum16_1 = _mm_maddubs_epi16(ones_8, sy_1); \
652-
const __m128i sum32_0 = _mm_madd_epi16(sum16_0, ones_16); \
653-
const __m128i sum32_1 = _mm_madd_epi16(sum16_1, ones_16); \
654-
const __m256 q = _mm256_cvtepi32_ps(MM256_SET_M128I(sum32_1, sum32_0)); \
655-
acc_block = _mm256_mul_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y_ptr[0].d)), q); \
640+
{
641+
const __m256i bit_mask = bytes_from_bits_32(&x[ib].qs[0]);
642+
const __m128i bit_mask_0 = _mm256_castsi256_si128(bit_mask);
643+
const __m128i bit_mask_1 = _mm256_extractf128_si256(bit_mask, 1);
644+
const __m128i qy_0 = _mm_loadu_si128((const __m128i *) &y_ptr[0].qs[0]);
645+
const __m128i qy_1 = _mm_loadu_si128((const __m128i *) &y_ptr[0].qs[16]);
646+
const __m128i sign_mask_0 = _mm_cmpeq_epi8(bit_mask_0, zero);
647+
const __m128i sign_mask_1 = _mm_cmpeq_epi8(bit_mask_1, zero);
648+
const __m128i sy_0 = _mm_sub_epi8(_mm_xor_si128(qy_0, sign_mask_0), sign_mask_0);
649+
const __m128i sy_1 = _mm_sub_epi8(_mm_xor_si128(qy_1, sign_mask_1), sign_mask_1);
650+
const __m128i sum16_0 = _mm_maddubs_epi16(ones_8, sy_0);
651+
const __m128i sum16_1 = _mm_maddubs_epi16(ones_8, sy_1);
652+
const __m128i sum32_0 = _mm_madd_epi16(sum16_0, ones_16);
653+
const __m128i sum32_1 = _mm_madd_epi16(sum16_1, ones_16);
654+
const __m256 q = _mm256_cvtepi32_ps(MM256_SET_M128I(sum32_1, sum32_0));
655+
acc_block = _mm256_mul_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y_ptr[0].d)), q);
656656
}
657657
Q1_AVX_BLOCK(1)
658658
Q1_AVX_BLOCK(2)

0 commit comments

Comments
 (0)