11// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
22// SPDX-License-Identifier: MIT
33// RapidFuzz v1.0.2
4- // Generated: 2023-10-31 11:09:46.332642
4+ // Generated: 2023-10-31 11:32:43.133377
55// ----------------------------------------------------------
66// This file is an amalgamation of multiple different files.
77// You probably shouldn't edit it directly.
@@ -5822,29 +5822,43 @@ jaro_similarity_simd_long_s2(Range<double*> scores, const detail::BlockPatternMa
58225822 assert (block.size () % vecs == 0 );
58235823 assert (static_cast <size_t >(s2.size ()) > sizeof (VecType) * 8 );
58245824
5825+ struct AlignedAlloc {
5826+ AlignedAlloc (size_t size)
5827+ {
5828+ // work around compilation failure in msvc
5829+ memory = operator new [](size, std::align_val_t (native_simd<VecType>::alignment));
5830+ }
5831+
5832+ ~AlignedAlloc ()
5833+ {
5834+ ::operator delete[] (memory, std::align_val_t (native_simd<VecType>::alignment));
5835+ }
5836+
5837+ void * memory = nullptr ;
5838+ };
5839+
58255840 native_simd<VecType> zero (VecType (0 ));
58265841 native_simd<VecType> one (1 );
58275842 size_t result_index = 0 ;
58285843
58295844 size_t s2_block_count = static_cast <size_t >(detail::ceil_div (s2.size (), sizeof (VecType) * 8 ));
5830- std::vector<native_simd<VecType>> T_flag;
5831- T_flag.resize (s2_block_count);
5832-
5833- std::vector<native_simd<VecType>> counter;
5834- counter.resize (s2_block_count);
5845+ AlignedAlloc memory (2 * s2_block_count * sizeof (native_simd<VecType>));
58355846
5836- std::vector<std::array<VecType, vec_width>> T_flags;
5837- T_flags.resize (s2_block_count);
5847+ native_simd<VecType>* T_flag = static_cast <native_simd<VecType>*>(memory.memory );
5848+ // reuse the same memory since counter is only required in the first half of the algorithm while
5849+ // T_flags is required in the second half
5850+ native_simd<VecType>* counter = static_cast <native_simd<VecType>*>(memory.memory ) + s2_block_count;
5851+ VecType* T_flags = static_cast <VecType*>(memory.memory ) + s2_block_count * vec_width;
58385852
58395853 for (size_t cur_vec = 0 ; cur_vec < block.size (); cur_vec += vecs) {
58405854 auto s2_cur = s2;
58415855 auto bounds = jaro_similarity_prepare_bound_long_s2 (s1_lengths + result_index, s2_cur);
58425856
58435857 native_simd<VecType> P_flag (VecType (0 ));
58445858
5845- std::fill (T_flag. begin () , T_flag. begin () + detail::ceil_div (s2_cur.size (), sizeof (VecType) * 8 ),
5859+ std::fill (T_flag, T_flag + detail::ceil_div (s2_cur.size (), sizeof (VecType) * 8 ),
58465860 native_simd<VecType>(VecType (0 )));
5847- std::fill (counter. begin () , counter. begin () + detail::ceil_div (s2_cur.size (), sizeof (VecType) * 8 ),
5861+ std::fill (counter, counter + detail::ceil_div (s2_cur.size (), sizeof (VecType) * 8 ),
58485862 native_simd<VecType>(VecType (1 )));
58495863
58505864 // In case s2 is longer than all of the elements in s1_lengths boundMaskSize
@@ -5886,11 +5900,7 @@ jaro_similarity_simd_long_s2(Range<double*> scores, const detail::BlockPatternMa
58865900 P_flag.store (P_flags.data ());
58875901
58885902 for (size_t i = 0 ; i < static_cast <size_t >(detail::ceil_div (s2_cur.size (), sizeof (VecType) * 8 )); ++i)
5889- {
5890- alignas (alignment) std::array<VecType, vec_width> T_flags_;
5891- T_flag[i].store (T_flags_.data ());
5892- T_flags[i] = T_flags_;
5893- }
5903+ T_flag[i].store (T_flags + i * vec_width);
58945904
58955905 for (size_t i = 0 ; i < vec_width; ++i) {
58965906 VecType CommonChars = counts[i];
@@ -5911,11 +5921,11 @@ jaro_similarity_simd_long_s2(Range<double*> scores, const detail::BlockPatternMa
59115921
59125922 {
59135923 size_t T_word_index = 0 ;
5914- VecType T_flag_cur = T_flags[T_word_index][ i];
5924+ VecType T_flag_cur = T_flags[T_word_index * vec_width + i];
59155925 while (P_flag_cur) {
59165926 while (!T_flag_cur) {
59175927 ++T_word_index;
5918- T_flag_cur = T_flags[T_word_index][ i];
5928+ T_flag_cur = T_flags[T_word_index * vec_width + i];
59195929 }
59205930
59215931 VecType PatternFlagMask = blsi (P_flag_cur);
0 commit comments