Skip to content

Commit c018fa5

Browse files
authored
Merge pull request #280 from wegank/32-bit-fix-0
Avoid _mm_cvtsi128_si64 on 32-bit builds
2 parents 4ccf1ea + a7b6913 commit c018fa5

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

src/fglm/linalg-fglm.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,13 @@ uint64_t _mm256_hsum(__m256i a)
211211
__m256i sum_lo = _mm256_add_epi64(a, a_hi);
212212
__m128i sum_hi = _mm256_extracti128_si256(sum_lo, 1);
213213
__m128i sum = _mm_add_epi64(_mm256_castsi256_si128(sum_lo), sum_hi);
214+
#if defined(__x86_64__) || defined(_M_X64)
214215
return (uint64_t) _mm_cvtsi128_si64(sum);
216+
#else
217+
uint64_t lo = (uint32_t) _mm_cvtsi128_si32(sum);
218+
uint64_t hi = (uint32_t) _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
219+
return lo | (hi << 32);
220+
#endif
215221
}
216222

217223
uint32_t _nmod32_vec_dot_split_avx2(const uint32_t * vec1, const uint32_t * vec2, int64_t len,

0 commit comments

Comments
 (0)