11// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
22// SPDX-License-Identifier: MIT
33// RapidFuzz v1.0.2
4- // Generated: 2023-10-08 12:45:00.456286
4+ // Generated: 2023-10-08 18:06:53.104764
55// ----------------------------------------------------------
66// This file is an amalgamation of multiple different files.
77// You probably shouldn't edit it directly.
@@ -1375,7 +1375,7 @@ T bit_mask_lsb(int n)
13751375{
13761376 T mask = static_cast <T>(-1 );
13771377 if (n < static_cast <int >(sizeof (T) * 8 )) {
1378- mask += static_cast <T>(1 ) << n;
1378+ mask += static_cast <T>(static_cast <T>( 1 ) << n) ;
13791379 }
13801380 return mask;
13811381}
@@ -2345,7 +2345,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
23452345static inline native_simd<uint32_t > operator >(const native_simd<uint32_t >& a,
23462346 const native_simd<uint32_t >& b) noexcept
23472347{
2348- __m256i signbit = _mm256_set1_epi32 (0x80000000 );
2348+ __m256i signbit = _mm256_set1_epi32 (static_cast < int32_t >( 0x80000000 ) );
23492349 __m256i a1 = _mm256_xor_si256 (a, signbit);
23502350 __m256i b1 = _mm256_xor_si256 (b, signbit);
23512351 return _mm256_cmpgt_epi32 (a1, b1); // signed compare
@@ -2934,7 +2934,7 @@ static inline native_simd<uint16_t> operator>(const native_simd<uint16_t>& a,
29342934static inline native_simd<uint32_t > operator >(const native_simd<uint32_t >& a,
29352935 const native_simd<uint32_t >& b) noexcept
29362936{
2937- __m128i signbit = _mm_set1_epi32 (0x80000000 );
2937+ __m128i signbit = _mm_set1_epi32 (static_cast < int32_t >( 0x80000000 ) );
29382938 __m128i a1 = _mm_xor_si128 (a, signbit);
29392939 __m128i b1 = _mm_xor_si128 (b, signbit);
29402940 return _mm_cmpgt_epi32 (a1, b1); // signed compare
@@ -2943,7 +2943,7 @@ static inline native_simd<uint32_t> operator>(const native_simd<uint32_t>& a,
29432943static inline native_simd<uint64_t > operator >(const native_simd<uint64_t >& a,
29442944 const native_simd<uint64_t >& b) noexcept
29452945{
2946- __m128i sign32 = _mm_set1_epi32 (0x80000000 ); // sign bit of each dword
2946+ __m128i sign32 = _mm_set1_epi32 (static_cast < int32_t >( 0x80000000 )); // sign bit of each dword
29472947 __m128i aflip = _mm_xor_si128 (a, sign32); // a with sign bits flipped to use signed compare
29482948 __m128i bflip = _mm_xor_si128 (b, sign32); // b with sign bits flipped to use signed compare
29492949 __m128i equal = _mm_cmpeq_epi32 (a, b); // a == b, dwords
@@ -3322,7 +3322,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase<T> {
33223322 friend T;
33233323};
33243324
3325- template <typename T>
3325+ template <typename T, typename ResType >
33263326struct MultiNormalizedMetricBase {
33273327 template <typename InputIt2>
33283328 void normalized_distance (double * scores, size_t score_count, InputIt2 first2, InputIt2 last2,
@@ -3362,23 +3362,23 @@ struct MultiNormalizedMetricBase {
33623362 throw std::invalid_argument (" scores has to have >= result_count() elements" );
33633363
33643364 // reinterpretation only works when the types have the same size
3365- int64_t * scores_i64 = nullptr ;
3366- if constexpr (sizeof (double ) == sizeof (int64_t ))
3367- scores_i64 = reinterpret_cast <int64_t *>(scores);
3365+ ResType* scores_orig = nullptr ;
3366+ if constexpr (sizeof (double ) == sizeof (ResType ))
3367+ scores_orig = reinterpret_cast <ResType *>(scores);
33683368 else
3369- scores_i64 = new int64_t [derived.result_count ()];
3369+ scores_orig = new ResType [derived.result_count ()];
33703370
33713371 Range s2_ (s2);
3372- derived.distance (scores_i64 , derived.result_count (), s2_);
3372+ derived.distance (scores_orig , derived.result_count (), s2_);
33733373
33743374 for (size_t i = 0 ; i < derived.get_input_count (); ++i) {
33753375 auto maximum = derived.maximum (i, s2);
33763376 double norm_dist =
3377- (maximum != 0 ) ? static_cast <double >(scores_i64 [i]) / static_cast <double >(maximum) : 0.0 ;
3377+ (maximum != 0 ) ? static_cast <double >(scores_orig [i]) / static_cast <double >(maximum) : 0.0 ;
33783378 scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0 ;
33793379 }
33803380
3381- if constexpr (sizeof (double ) != sizeof (int64_t )) delete[] scores_i64 ;
3381+ if constexpr (sizeof (double ) != sizeof (ResType )) delete[] scores_orig ;
33823382 }
33833383
33843384 template <typename InputIt2>
@@ -3400,7 +3400,7 @@ struct MultiNormalizedMetricBase {
34003400};
34013401
34023402template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
3403- struct MultiDistanceBase : public MultiNormalizedMetricBase <T> {
3403+ struct MultiDistanceBase : public MultiNormalizedMetricBase <T, ResType > {
34043404 template <typename InputIt2>
34053405 void distance (ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
34063406 ResType score_cutoff = static_cast <ResType>(WorstDistance)) const
@@ -3451,7 +3451,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase<T> {
34513451};
34523452
34533453template <typename T, typename ResType, int64_t WorstSimilarity, int64_t WorstDistance>
3454- struct MultiSimilarityBase : public MultiNormalizedMetricBase <T> {
3454+ struct MultiSimilarityBase : public MultiNormalizedMetricBase <T, ResType > {
34553455 template <typename InputIt2>
34563456 void distance (ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
34573457 ResType score_cutoff = static_cast <ResType>(WorstDistance)) const
@@ -4726,7 +4726,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase<MultiLCSseq<MaxLen>, int
47264726 std::numeric_limits<int64_t >::max()> {
47274727private:
47284728 friend detail::MultiSimilarityBase<MultiLCSseq<MaxLen>, int64_t , 0 , std::numeric_limits<int64_t >::max()>;
4729- friend detail::MultiNormalizedMetricBase<MultiLCSseq<MaxLen>>;
4729+ friend detail::MultiNormalizedMetricBase<MultiLCSseq<MaxLen>, int64_t >;
47304730
47314731 constexpr static size_t get_vec_size ()
47324732 {
@@ -5012,7 +5012,7 @@ struct MultiIndel
50125012 : public detail::MultiDistanceBase<MultiIndel<MaxLen>, int64_t , 0 , std::numeric_limits<int64_t >::max()> {
50135013private:
50145014 friend detail::MultiDistanceBase<MultiIndel<MaxLen>, int64_t , 0 , std::numeric_limits<int64_t >::max()>;
5015- friend detail::MultiNormalizedMetricBase<MultiIndel<MaxLen>>;
5015+ friend detail::MultiNormalizedMetricBase<MultiIndel<MaxLen>, int64_t >;
50165016
50175017public:
50185018 MultiIndel (size_t count) : scorer(count)
@@ -5570,7 +5570,7 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range<InputIt1> P, Ran
55705570#ifdef RAPIDFUZZ_SIMD
55715571template <typename VecType, typename InputIt, int _lto_hack = RAPIDFUZZ_LTO_HACK>
55725572void jaro_similarity_simd (Range<double *> scores, const detail::BlockPatternMatchVector& block,
5573- const std::vector<size_t >& s1_lengths, Range<InputIt> s2,
5573+ const std::vector<int64_t >& s1_lengths, Range<InputIt> s2,
55745574 double score_cutoff) noexcept
55755575{
55765576# ifdef RAPIDFUZZ_AVX2
@@ -5588,15 +5588,15 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
55885588 size_t result_index = 0 ;
55895589
55905590 if (score_cutoff > 1.0 ) {
5591- for (int64_t i = 0 ; i < s1_lengths.size (); i++)
5591+ for (int64_t i = 0 ; i < static_cast < int64_t >( s1_lengths.size () ); i++)
55925592 scores[i] = 0.0 ;
55935593
55945594 return ;
55955595 }
55965596
55975597 if (s2.empty ()) {
5598- for (int64_t i = 0 ; i < s1_lengths.size (); i++)
5599- scores[i ] = s1_lengths[i] ? 0.0 : 1.0 ;
5598+ for (size_t i = 0 ; i < s1_lengths.size (); i++)
5599+ scores[static_cast < int64_t >(i) ] = s1_lengths[i] ? 0.0 : 1.0 ;
56005600
56015601 return ;
56025602 }
@@ -5617,8 +5617,8 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56175617
56185618 if (Bound > maxBound) maxBound = Bound;
56195619
5620- boundMaskSize_[i] = bit_mask_lsb<VecType>(2 * Bound);
5621- boundMask_[i] = bit_mask_lsb<VecType>(Bound + 1 );
5620+ boundMaskSize_[i] = bit_mask_lsb<VecType>(static_cast < int >( 2 * Bound) );
5621+ boundMask_[i] = bit_mask_lsb<VecType>(static_cast < int >( Bound + 1 ) );
56225622 });
56235623
56245624 if (s2_cur.size () > lastRelevantChar) s2_cur.remove_suffix (s2_cur.size () - lastRelevantChar);
@@ -5649,7 +5649,7 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56495649 P_flag.store (P_flags.data ());
56505650 alignas (32 ) std::array<VecType, vec_width> T_flags;
56515651 T_flag.store (T_flags.data ());
5652- for (int64_t i = 0 ; i < vec_width; ++i) {
5652+ for (size_t i = 0 ; i < vec_width; ++i) {
56535653 VecType CommonChars = counts[i];
56545654 if (!jaro_common_char_filter (s1_lengths[result_index], s2.size (), CommonChars, score_cutoff)) {
56555655 scores[static_cast <int64_t >(result_index)] = 0.0 ;
@@ -5661,20 +5661,22 @@ void jaro_similarity_simd(Range<double*> scores, const detail::BlockPatternMatch
56615661 VecType T_flag_cur = T_flags[i];
56625662 size_t Transpositions = 0 ;
56635663
5664- int64_t cur_block = i / 4 ;
5665- int64_t offset = 8 * (i % 4 );
5664+ static constexpr size_t vecs_per_word = vec_width / vecs;
5665+ size_t cur_block = i / vecs_per_word;
5666+ int64_t offset = static_cast <int64_t >(sizeof (VecType) * 8 * (i % vecs_per_word));
56665667 while (T_flag_cur) {
5667- uint64_t PatternFlagMask = blsi (P_flag_cur);
5668+ VecType PatternFlagMask = blsi (P_flag_cur);
56685669
5669- Transpositions +=
5670- !(block. get (cur_block, s2[ countr_zero (T_flag_cur)]) & (PatternFlagMask << offset));
5670+ uint64_t PM_j = block. get (cur_block, s2[ countr_zero (T_flag_cur)]);
5671+ Transpositions += !(PM_j & (static_cast < uint64_t >( PatternFlagMask) << offset));
56715672
56725673 T_flag_cur = blsr (T_flag_cur);
56735674 P_flag_cur ^= PatternFlagMask;
56745675 }
56755676
56765677 double Sim =
56775678 jaro_calculate_similarity (s1_lengths[result_index], s2.size (), CommonChars, Transpositions);
5679+
56785680 scores[static_cast <int64_t >(result_index)] = (Sim >= score_cutoff) ? Sim : 0 ;
56795681 result_index++;
56805682 }
@@ -5763,7 +5765,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
57635765
57645766private:
57655767 friend detail::MultiSimilarityBase<MultiJaro<MaxLen>, double , 0 , 1 >;
5766- friend detail::MultiNormalizedMetricBase<MultiJaro<MaxLen>>;
5768+ friend detail::MultiNormalizedMetricBase<MultiJaro<MaxLen>, double >;
57675769
57685770 constexpr static size_t get_vec_size ()
57695771 {
@@ -5829,7 +5831,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58295831
58305832 if (pos >= input_count) throw std::invalid_argument (" out of bounds insert" );
58315833
5832- str_lens[pos] = static_cast < size_t >( len) ;
5834+ str_lens[pos] = len;
58335835 for (; first1 != last1; ++first1) {
58345836 PM.insert (block, *first1, block_pos);
58355837 block_pos++;
@@ -5857,7 +5859,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58575859 }
58585860
58595861 template <typename InputIt2>
5860- double maximum (size_t s1_idx, detail::Range<InputIt2>) const
5862+ double maximum ([[maybe_unused]] size_t s1_idx, detail::Range<InputIt2>) const
58615863 {
58625864 return 1.0 ;
58635865 }
@@ -5870,7 +5872,7 @@ struct MultiJaro : public detail::MultiSimilarityBase<MultiJaro<MaxLen>, double,
58705872 size_t input_count;
58715873 size_t pos = 0 ;
58725874 detail::BlockPatternMatchVector PM;
5873- std::vector<size_t > str_lens;
5875+ std::vector<int64_t > str_lens;
58745876};
58755877
58765878} /* namespace experimental */
@@ -6070,7 +6072,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
60706072
60716073private:
60726074 friend detail::MultiSimilarityBase<MultiJaroWinkler<MaxLen>, double , 0 , 1 >;
6073- friend detail::MultiNormalizedMetricBase<MultiJaroWinkler<MaxLen>>;
6075+ friend detail::MultiNormalizedMetricBase<MultiJaroWinkler<MaxLen>, double >;
60746076
60756077public:
60766078 MultiJaroWinkler (size_t count, double prefix_weight_) : scorer(count), prefix_weight(prefix_weight_)
@@ -6102,8 +6104,8 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61026104 scorer.insert (first1, last1);
61036105 size_t len = static_cast <size_t >(std::distance (first1, last1));
61046106 std::array<uint64_t , 4 > prefix;
6105- for (size_t i = 0 ; i < std::min< int64_t > (len, 4 ); ++i)
6106- prefix[i] = ( uint64_t ) first1[i] ;
6107+ for (size_t i = 0 ; i < std::min (len, size_t ( 4 ) ); ++i)
6108+ prefix[i] = static_cast < uint64_t >( first1[static_cast < ptrdiff_t >(i)]) ;
61076109
61086110 str_lens.push_back (len);
61096111 prefixes.push_back (prefix);
@@ -6117,15 +6119,16 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61176119 if (score_count < result_count ())
61186120 throw std::invalid_argument (" scores has to have >= result_count() elements" );
61196121
6120- scorer.similarity (scores, score_count, s2, score_cutoff);
6122+ scorer.similarity (scores, score_count, s2, std::min ( 0.7 , score_cutoff) );
61216123
61226124 for (size_t i = 0 ; i < get_input_count (); ++i) {
61236125 if (scores[i] > 0.7 ) {
6124- int64_t min_len = std::min< int64_t >(s2.size (), str_lens[i]);
6125- int64_t max_prefix = std::min< int64_t > (min_len, 4 );
6126- int64_t prefix = 0 ;
6126+ size_t min_len = std::min ( static_cast < size_t >(s2.size () ), str_lens[i]);
6127+ size_t max_prefix = std::min (min_len, size_t ( 4 ) );
6128+ size_t prefix = 0 ;
61276129 for (; prefix < max_prefix; ++prefix)
6128- if (s2[prefix] != prefixes[i][prefix]) break ;
6130+ if (static_cast <uint64_t >(s2[static_cast <ptrdiff_t >(prefix)]) != prefixes[i][prefix])
6131+ break ;
61296132
61306133 scores[i] += static_cast <double >(prefix) * prefix_weight * (1.0 - scores[i]);
61316134 }
@@ -6135,7 +6138,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase<MultiJaroWinkler<Ma
61356138 }
61366139
61376140 template <typename InputIt2>
6138- double maximum (size_t s1_idx, detail::Range<InputIt2>) const
6141+ double maximum ([[maybe_unused]] size_t s1_idx, detail::Range<InputIt2>) const
61396142 {
61406143 return 1.0 ;
61416144 }
@@ -7674,7 +7677,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase<MultiLevenshtein<MaxL
76747677private:
76757678 friend detail::MultiDistanceBase<MultiLevenshtein<MaxLen>, int64_t , 0 ,
76767679 std::numeric_limits<int64_t >::max()>;
7677- friend detail::MultiNormalizedMetricBase<MultiLevenshtein<MaxLen>>;
7680+ friend detail::MultiNormalizedMetricBase<MultiLevenshtein<MaxLen>, int64_t >;
76787681
76797682 constexpr static size_t get_vec_size ()
76807683 {
@@ -8240,7 +8243,7 @@ struct MultiOSA
82408243 : public detail::MultiDistanceBase<MultiOSA<MaxLen>, int64_t , 0 , std::numeric_limits<int64_t >::max()> {
82418244private:
82428245 friend detail::MultiDistanceBase<MultiOSA<MaxLen>, int64_t , 0 , std::numeric_limits<int64_t >::max()>;
8243- friend detail::MultiNormalizedMetricBase<MultiOSA<MaxLen>>;
8246+ friend detail::MultiNormalizedMetricBase<MultiOSA<MaxLen>, int64_t >;
82448247
82458248 constexpr static size_t get_vec_size ()
82468249 {
0 commit comments