Skip to content

Commit 3a637d3

Browse files
committed
fix some bugs in simd Jaro/JaroWinkler implementation
1 parent f121d28 commit 3a637d3

13 files changed

Lines changed: 100 additions & 92 deletions

File tree

extras/rapidfuzz_amalgamated.hpp

Lines changed: 47 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
22
// SPDX-License-Identifier: MIT
33
// RapidFuzz v1.0.2
4-
// Generated: 2023-10-08 12:45:00.456286
4+
// Generated: 2023-10-08 18:06:53.104764
55
// ----------------------------------------------------------
66
// This file is an amalgamation of multiple different files.
77
// You probably shouldn't edit it directly.
@@ -1375,7 +1375,7 @@ T bit_mask_lsb(int n)
13751375
{
13761376
T mask = static_cast<T>(-1);
13771377
if (n < static_cast<int>(sizeof(T) * 8)) {
1378-
mask += static_cast<T>(1) << n;
1378+
mask += static_cast<T>(static_cast<T>(1) << n);
13791379
}
13801380
return mask;
13811381
}
@@ -2345,7 +2345,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
23452345
static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
23462346
const native_simd<uint32_t>& b) noexcept
23472347
{
2348-
__m256i signbit = _mm256_set1_epi32(0x80000000);
2348+
__m256i signbit = _mm256_set1_epi32(static_cast<int32_t>(0x80000000));
23492349
__m256i a1 = _mm256_xor_si256(a, signbit);
23502350
__m256i b1 = _mm256_xor_si256(b, signbit);
23512351
return _mm256_cmpgt_epi32(a1, b1); // signed compare
@@ -2934,7 +2934,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
29342934
static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
29352935
const native_simd<uint32_t>& b) noexcept
29362936
{
2937-
__m128i signbit = _mm_set1_epi32(0x80000000);
2937+
__m128i signbit = _mm_set1_epi32(static_cast<int32_t>(0x80000000));
29382938
__m128i a1 = _mm_xor_si128(a, signbit);
29392939
__m128i b1 = _mm_xor_si128(b, signbit);
29402940
return _mm_cmpgt_epi32(a1, b1); // signed compare
@@ -2943,7 +2943,7 @@ static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
29432943
static inline native_simd<uint64_t> operator>(const native_simd<uint64_t>& a,
29442944
const native_simd<uint64_t>& b) noexcept
29452945
{
2946-
__m128i sign32 = _mm_set1_epi32(0x80000000); // sign bit of each dword
2946+
__m128i sign32 = _mm_set1_epi32(static_cast<int32_t>(0x80000000)); // sign bit of each dword
29472947
__m128i aflip = _mm_xor_si128(a, sign32); // a with sign bits flipped to use signed compare
29482948
__m128i bflip = _mm_xor_si128(b, sign32); // b with sign bits flipped to use signed compare
29492949
__m128i equal = _mm_cmpeq_epi32(a, b); // a == b, dwords
@@ -3322,7 +3322,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase<T> {
33223322
friend T;
33233323
};
33243324

3325-
template <typename T>
3325+
template <typename T, typename ResType>
33263326
struct MultiNormalizedMetricBase {
33273327
template <typename InputIt2>
33283328
void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
@@ -3362,23 +3362,23 @@ struct MultiNormalizedMetricBase {
33623362
throw std::invalid_argument("scores has to have >= result_count() elements");
33633363

33643364
// reinterpretation only works when the types have the same size
3365-
int64_t* scores_i64 = nullptr;
3366-
if constexpr (sizeof(double) == sizeof(int64_t))
3367-
scores_i64 = reinterpret_cast<int64_t*>(scores);
3365+
ResType* scores_orig = nullptr;
3366+
if constexpr (sizeof(double) == sizeof(ResType))
3367+
scores_orig = reinterpret_cast<ResType*>(scores);
33683368
else
3369-
scores_i64 = new int64_t[derived.result_count()];
3369+
scores_orig = new ResType[derived.result_count()];
33703370

33713371
Range s2_(s2);
3372-
derived.distance(scores_i64, derived.result_count(), s2_);
3372+
derived.distance(scores_orig, derived.result_count(), s2_);
33733373

33743374
for (size_t i = 0; i < derived.get_input_count(); ++i) {
33753375
auto maximum = derived.maximum(i, s2);
33763376
double norm_dist =
3377-
(maximum != 0) ? static_cast<double>(scores_i64[i]) / static_cast<double>(maximum) : 0.0;
3377+
(maximum != 0) ? static_cast<double>(scores_orig[i]) / static_cast<double>(maximum) : 0.0;
33783378
scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0;
33793379
}
33803380

3381-
if constexpr (sizeof(double) != sizeof(int64_t)) delete[] scores_i64;
3381+
if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig;
33823382
}
33833383

33843384
template <typename InputIt2>
@@ -3400,7 +3400,7 @@ struct MultiNormalizedMetricBase {
34003400
};
34013401

34023402
template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
3403-
struct MultiDistanceBase : public MultiNormalizedMetricBase<T> {
3403+
struct MultiDistanceBase : public MultiNormalizedMetricBase<T, ResType> {
34043404
template <typename InputIt2>
34053405
void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
34063406
ResType score_cutoff = static_cast<ResType>(WorstDistance)) const
@@ -3451,7 +3451,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase<T> {
34513451
};
34523452

34533453
template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
3454-
struct MultiSimilarityBase : public MultiNormalizedMetricBase<T> {
3454+
struct MultiSimilarityBase : public MultiNormalizedMetricBase<T, ResType> {
34553455
template <typename InputIt2>
34563456
void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
34573457
ResType score_cutoff = static_cast<ResType>(WorstDistance)) const
@@ -4726,7 +4726,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase<MultiLCSseq<MaxLen>, int
47264726
std::numeric_limits<int64_t>::max()> {
47274727
private:
47284728
friend detail::MultiSimilarityBase<MultiLCSseq<MaxLen>, int64_t, 0, std::numeric_limits<int64_t>::max()>;
4729-
friend detail::MultiNormalizedMetricBase<MultiLCSseq<MaxLen>>;
4729+
friend detail::MultiNormalizedMetricBase<MultiLCSseq<MaxLen>, int64_t>;
47304730

47314731
constexpr static size_t get_vec_size()
47324732
{
@@ -5012,7 +5012,7 @@ struct MultiIndel
50125012
: public detail::MultiDistanceBase<MultiIndel<MaxLen>, int64_t, 0, std::numeric_limits<int64_t>::max()> {
50135013
private:
50145014
friend detail::MultiDistanceBase<MultiIndel<MaxLen>, int64_t, 0, std::numeric_limits<int64_t>::max()>;
5015-
friend detail::MultiNormalizedMetricBase<MultiIndel<MaxLen>>;
5015+
friend detail::MultiNormalizedMetricBase<MultiIndel<MaxLen>, int64_t>;
50165016

50175017
public:
50185018
MultiIndel(size_t count) : scorer(count)
@@ -5570,7 +5570,7 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range<InputIt1> P, Ran
55705570
#ifdef RAPIDFUZZ_SIMD
55715571
template <typename VecType, typename InputIt, int _lto_hack = RAPIDFUZZ_LTO_HACK>
55725572
void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatchVector& block,
5573-
const std::vector<size_t>& s1_lengths, Range<InputIt> s2,
5573+
const std::vector<int64_t>& s1_lengths, Range<InputIt> s2,
55745574
double score_cutoff) noexcept
55755575
{
55765576
# ifdef RAPIDFUZZ_AVX2
@@ -5588,15 +5588,15 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
55885588
size_t result_index = 0;
55895589

55905590
if (score_cutoff > 1.0) {
5591-
for (int64_t i = 0; i < s1_lengths.size(); i++)
5591+
for (int64_t i = 0; i < static_cast<int64_t>(s1_lengths.size()); i++)
55925592
scores[i] = 0.0;
55935593

55945594
return;
55955595
}
55965596

55975597
if (s2.empty()) {
5598-
for (int64_t i = 0; i < s1_lengths.size(); i++)
5599-
scores[i] = s1_lengths[i] ? 0.0 : 1.0;
5598+
for (size_t i = 0; i < s1_lengths.size(); i++)
5599+
scores[static_cast<int64_t>(i)] = s1_lengths[i] ? 0.0 : 1.0;
56005600

56015601
return;
56025602
}
@@ -5617,8 +5617,8 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56175617

56185618
if (Bound > maxBound) maxBound = Bound;
56195619

5620-
boundMaskSize_[i] = bit_mask_lsb<VecType>(2 * Bound);
5621-
boundMask_[i] = bit_mask_lsb<VecType>(Bound + 1);
5620+
boundMaskSize_[i] = bit_mask_lsb<VecType>(static_cast<int>(2 * Bound));
5621+
boundMask_[i] = bit_mask_lsb<VecType>(static_cast<int>(Bound + 1));
56225622
});
56235623

56245624
if (s2_cur.size() > lastRelevantChar) s2_cur.remove_suffix(s2_cur.size() - lastRelevantChar);
@@ -5649,7 +5649,7 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56495649
P_flag.store(P_flags.data());
56505650
alignas(32) std::array<VecType, vec_width> T_flags;
56515651
T_flag.store(T_flags.data());
5652-
for (int64_t i = 0; i < vec_width; ++i) {
5652+
for (size_t i = 0; i < vec_width; ++i) {
56535653
VecType CommonChars = counts[i];
56545654
if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) {
56555655
scores[static_cast<int64_t>(result_index)] = 0.0;
@@ -5661,20 +5661,22 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56615661
VecType T_flag_cur = T_flags[i];
56625662
size_t Transpositions = 0;
56635663

5664-
int64_t cur_block = i / 4;
5665-
int64_t offset = 8 * (i % 4);
5664+
static constexpr size_t vecs_per_word = vec_width / vecs;
5665+
size_t cur_block = i / vecs_per_word;
5666+
int64_t offset = static_cast<int64_t>(sizeof(VecType) * 8 * (i % vecs_per_word));
56665667
while (T_flag_cur) {
5667-
uint64_t PatternFlagMask = blsi(P_flag_cur);
5668+
VecType PatternFlagMask = blsi(P_flag_cur);
56685669

5669-
Transpositions +=
5670-
!(block.get(cur_block, s2[countr_zero(T_flag_cur)]) & (PatternFlagMask << offset));
5670+
uint64_t PM_j = block.get(cur_block, s2[countr_zero(T_flag_cur)]);
5671+
Transpositions += !(PM_j & (static_cast<uint64_t>(PatternFlagMask) << offset));
56715672

56725673
T_flag_cur = blsr(T_flag_cur);
56735674
P_flag_cur ^= PatternFlagMask;
56745675
}
56755676

56765677
double Sim =
56775678
jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions);
5679+
56785680
scores[static_cast<int64_t>(result_index)] = (Sim >= score_cutoff) ? Sim : 0;
56795681
result_index++;
56805682
}
@@ -5763,7 +5765,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
57635765

57645766
private:
57655767
friend detail::MultiSimilarityBase<MultiJaro<MaxLen>, double, 0, 1>;
5766-
friend detail::MultiNormalizedMetricBase<MultiJaro<MaxLen>>;
5768+
friend detail::MultiNormalizedMetricBase<MultiJaro<MaxLen>, double>;
57675769

57685770
constexpr static size_t get_vec_size()
57695771
{
@@ -5829,7 +5831,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58295831

58305832
if (pos >= input_count) throw std::invalid_argument("out of bounds insert");
58315833

5832-
str_lens[pos] = static_cast<size_t>(len);
5834+
str_lens[pos] = len;
58335835
for (; first1 != last1; ++first1) {
58345836
PM.insert(block, *first1, block_pos);
58355837
block_pos++;
@@ -5857,7 +5859,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58575859
}
58585860

58595861
template <typename InputIt2>
5860-
double maximum(size_t s1_idx, detail::Range<InputIt2>) const
5862+
double maximum([[maybe_unused]] size_t s1_idx, detail::Range<InputIt2>) const
58615863
{
58625864
return 1.0;
58635865
}
@@ -5870,7 +5872,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58705872
size_t input_count;
58715873
size_t pos = 0;
58725874
detail::BlockPatternMatchVector PM;
5873-
std::vector<size_t> str_lens;
5875+
std::vector<int64_t> str_lens;
58745876
};
58755877

58765878
} /* namespace experimental */
@@ -6070,7 +6072,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
60706072

60716073
private:
60726074
friend detail::MultiSimilarityBase<MultiJaroWinkler<MaxLen>, double, 0, 1>;
6073-
friend detail::MultiNormalizedMetricBase<MultiJaroWinkler<MaxLen>>;
6075+
friend detail::MultiNormalizedMetricBase<MultiJaroWinkler<MaxLen>, double>;
60746076

60756077
public:
60766078
MultiJaroWinkler(size_t count, double prefix_weight_) : scorer(count), prefix_weight(prefix_weight_)
@@ -6102,8 +6104,8 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61026104
scorer.insert(first1, last1);
61036105
size_t len = static_cast<size_t>(std::distance(first1, last1));
61046106
std::array<uint64_t, 4> prefix;
6105-
for (size_t i = 0; i < std::min<int64_t>(len, 4); ++i)
6106-
prefix[i] = (uint64_t)first1[i];
6107+
for (size_t i = 0; i < std::min(len, size_t(4)); ++i)
6108+
prefix[i] = static_cast<uint64_t>(first1[static_cast<ptrdiff_t>(i)]);
61076109

61086110
str_lens.push_back(len);
61096111
prefixes.push_back(prefix);
@@ -6117,15 +6119,16 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61176119
if (score_count < result_count())
61186120
throw std::invalid_argument("scores has to have >= result_count() elements");
61196121

6120-
scorer.similarity(scores, score_count, s2, score_cutoff);
6122+
scorer.similarity(scores, score_count, s2, std::min(0.7, score_cutoff));
61216123

61226124
for (size_t i = 0; i < get_input_count(); ++i) {
61236125
if (scores[i] > 0.7) {
6124-
int64_t min_len = std::min<int64_t>(s2.size(), str_lens[i]);
6125-
int64_t max_prefix = std::min<int64_t>(min_len, 4);
6126-
int64_t prefix = 0;
6126+
size_t min_len = std::min(static_cast<size_t>(s2.size()), str_lens[i]);
6127+
size_t max_prefix = std::min(min_len, size_t(4));
6128+
size_t prefix = 0;
61276129
for (; prefix < max_prefix; ++prefix)
6128-
if (s2[prefix] != prefixes[i][prefix]) break;
6130+
if (static_cast<uint64_t>(s2[static_cast<ptrdiff_t>(prefix)]) != prefixes[i][prefix])
6131+
break;
61296132

61306133
scores[i] += static_cast<double>(prefix) * prefix_weight * (1.0 - scores[i]);
61316134
}
@@ -6135,7 +6138,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61356138
}
61366139

61376140
template <typename InputIt2>
6138-
double maximum(size_t s1_idx, detail::Range<InputIt2>) const
6141+
double maximum([[maybe_unused]] size_t s1_idx, detail::Range<InputIt2>) const
61396142
{
61406143
return 1.0;
61416144
}
@@ -7674,7 +7677,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase<MultiLevenshtein<MaxL
76747677
private:
76757678
friend detail::MultiDistanceBase<MultiLevenshtein<MaxLen>, int64_t, 0,
76767679
std::numeric_limits<int64_t>::max()>;
7677-
friend detail::MultiNormalizedMetricBase<MultiLevenshtein<MaxLen>>;
7680+
friend detail::MultiNormalizedMetricBase<MultiLevenshtein<MaxLen>, int64_t>;
76787681

76797682
constexpr static size_t get_vec_size()
76807683
{
@@ -8240,7 +8243,7 @@ struct MultiOSA
82408243
: public detail::MultiDistanceBase<MultiOSA<MaxLen>, int64_t, 0, std::numeric_limits<int64_t>::max()> {
82418244
private:
82428245
friend detail::MultiDistanceBase<MultiOSA<MaxLen>, int64_t, 0, std::numeric_limits<int64_t>::max()>;
8243-
friend detail::MultiNormalizedMetricBase<MultiOSA<MaxLen>>;
8246+
friend detail::MultiNormalizedMetricBase<MultiOSA<MaxLen>, int64_t>;
82448247

82458248
constexpr static size_t get_vec_size()
82468249
{

rapidfuzz/details/distance.hpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase<T> {
362362
friend T;
363363
};
364364

365-
template <typename T>
365+
template <typename T, typename ResType>
366366
struct MultiNormalizedMetricBase {
367367
template <typename InputIt2>
368368
void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
@@ -402,23 +402,23 @@ struct MultiNormalizedMetricBase {
402402
throw std::invalid_argument("scores has to have >= result_count() elements");
403403

404404
// reinterpretation only works when the types have the same size
405-
int64_t* scores_i64 = nullptr;
406-
if constexpr (sizeof(double) == sizeof(int64_t))
407-
scores_i64 = reinterpret_cast<int64_t*>(scores);
405+
ResType* scores_orig = nullptr;
406+
if constexpr (sizeof(double) == sizeof(ResType))
407+
scores_orig = reinterpret_cast<ResType*>(scores);
408408
else
409-
scores_i64 = new int64_t[derived.result_count()];
409+
scores_orig = new ResType[derived.result_count()];
410410

411411
Range s2_(s2);
412-
derived.distance(scores_i64, derived.result_count(), s2_);
412+
derived.distance(scores_orig, derived.result_count(), s2_);
413413

414414
for (size_t i = 0; i < derived.get_input_count(); ++i) {
415415
auto maximum = derived.maximum(i, s2);
416416
double norm_dist =
417-
(maximum != 0) ? static_cast<double>(scores_i64[i]) / static_cast<double>(maximum) : 0.0;
417+
(maximum != 0) ? static_cast<double>(scores_orig[i]) / static_cast<double>(maximum) : 0.0;
418418
scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0;
419419
}
420420

421-
if constexpr (sizeof(double) != sizeof(int64_t)) delete[] scores_i64;
421+
if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig;
422422
}
423423

424424
template <typename InputIt2>
@@ -440,7 +440,7 @@ struct MultiNormalizedMetricBase {
440440
};
441441

442442
template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
443-
struct MultiDistanceBase : public MultiNormalizedMetricBase<T> {
443+
struct MultiDistanceBase : public MultiNormalizedMetricBase<T, ResType> {
444444
template <typename InputIt2>
445445
void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
446446
ResType score_cutoff = static_cast<ResType>(WorstDistance)) const
@@ -491,7 +491,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase<T> {
491491
};
492492

493493
template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
494-
struct MultiSimilarityBase : public MultiNormalizedMetricBase<T> {
494+
struct MultiSimilarityBase : public MultiNormalizedMetricBase<T, ResType> {
495495
template <typename InputIt2>
496496
void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
497497
ResType score_cutoff = static_cast<ResType>(WorstDistance)) const

rapidfuzz/details/intrinsics.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ T bit_mask_lsb(int n)
2121
{
2222
T mask = static_cast<T>(-1);
2323
if (n < static_cast<int>(sizeof(T) * 8)) {
24-
mask += static_cast<T>(1) << n;
24+
mask += static_cast<T>(static_cast<T>(1) << n);
2525
}
2626
return mask;
2727
}

rapidfuzz/details/simd_avx2.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
533533
static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
534534
const native_simd<uint32_t>& b) noexcept
535535
{
536-
__m256i signbit = _mm256_set1_epi32(0x80000000);
536+
__m256i signbit = _mm256_set1_epi32(static_cast<int32_t>(0x80000000));
537537
__m256i a1 = _mm256_xor_si256(a, signbit);
538538
__m256i b1 = _mm256_xor_si256(b, signbit);
539539
return _mm256_cmpgt_epi32(a1, b1); // signed compare

rapidfuzz/details/simd_sse2.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
562562
static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
563563
const native_simd<uint32_t>& b) noexcept
564564
{
565-
__m128i signbit = _mm_set1_epi32(0x80000000);
565+
__m128i signbit = _mm_set1_epi32(static_cast<int32_t>(0x80000000));
566566
__m128i a1 = _mm_xor_si128(a, signbit);
567567
__m128i b1 = _mm_xor_si128(b, signbit);
568568
return _mm_cmpgt_epi32(a1, b1); // signed compare
@@ -571,7 +571,7 @@ static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
571571
static inline native_simd<uint64_t> operator>(const native_simd<uint64_t>& a,
572572
const native_simd<uint64_t>& b) noexcept
573573
{
574-
__m128i sign32 = _mm_set1_epi32(0x80000000); // sign bit of each dword
574+
__m128i sign32 = _mm_set1_epi32(static_cast<int32_t>(0x80000000)); // sign bit of each dword
575575
__m128i aflip = _mm_xor_si128(a, sign32); // a with sign bits flipped to use signed compare
576576
__m128i bflip = _mm_xor_si128(b, sign32); // b with sign bits flipped to use signed compare
577577
__m128i equal = _mm_cmpeq_epi32(a, b); // a == b, dwords

0 commit comments

Comments
 (0)