@@ -457,7 +457,9 @@ namespace CompressedCuckoo{
457457 // Currently set to false because clear_swath hasn't been rigorously tested
458458 constexpr bool _only_clear_ota_and_fca = false ;
459459 if (!_only_clear_ota_and_fca){ // Competitive with the code in the loop below
460- memset (storage, 0x0 , allocation_size);
460+ for (hash_t i = 0 ; i < total_blocks; i++){
461+ storage[i] = block_t {};
462+ }
461463 return storage;
462464 } // ELSE
463465 for (hash_t i = 0 ; i < total_blocks; i++){
@@ -496,6 +498,9 @@ namespace CompressedCuckoo{
496498
497499 size_t allocation_size = sizeof (*_summed_counters) *
498500 (_buckets_per_block + 1 );
501+ // Round allocation size up to a multiple of 64.
502+ allocation_size = (allocation_size + g_cache_line_size_bytes - 1 ) &
503+ ~(g_cache_line_size_bytes - 1 );
499504 _summed_counters = static_cast <decltype (_summed_counters)>(aligned_alloc (g_cache_line_size_bytes, allocation_size));
500505 // Memset not required, initialized by full_exclusive_scan
501506
@@ -665,8 +670,8 @@ namespace CompressedCuckoo{
665670 constexpr uint_fast8_t num_passes = util::log2ceil (_buckets_per_block);
666671 static_assert (_max_fingerprints_per_block > 0 , " The number of fingerprints "
667672 " per block must be one or more." );
668- constexpr uint_fast8_t masked_count = static_cast <uint_fast8_t >(ceil ( log2 (
669- _max_fingerprints_per_block) / _fullness_counter_width)) - 1 ;
673+ constexpr uint_fast8_t masked_count = static_cast <uint_fast8_t >(util::log2ceil (
674+ _max_fingerprints_per_block) / _fullness_counter_width) - 1 ;
670675 for (int8_t i = 0 ; i < masked_count; i++){ // Masked to avoid overflows
671676 sum = (sum & _reduction_masks[i]) + ((sum >> (_fullness_counter_width << i)) & _reduction_masks[i]);
672677 }
@@ -730,8 +735,9 @@ namespace CompressedCuckoo{
730735 INLINE uint16_t exclusive_reduce_with_popcount128 (const block_t & b,
731736 uint8_t counter_index) const {
732737 constexpr __uint128_t one = 1 ;
733- const __uint128_t mask = (one << (_fullness_counter_width * counter_index))
734- - one;
738+ // Thanks to @asl for the bugfix https://github.com/AMDComputeLibraries/morton_filter/issues/2#issuecomment-568480311
739+ const uint64_t shift = _fullness_counter_width * counter_index;
740+ const __uint128_t mask = shift == 128 ? __uint128_t (-1 ) : (one << shift) - one;
735741 uint8_t sum = 0u ;
736742 __uint128_t counters;
737743 memcpy (&counters, &b, sizeof (__uint128_t ));
@@ -1192,12 +1198,12 @@ namespace CompressedCuckoo{
11921198
11931199 // Reports the C parameter from the VLDB'18 paper. This is the ratio of physical slots in the FSA
11941200 // to logical slots per block.
1195- constexpr double report_compression_ratio (){
1201+ constexpr double report_compression_ratio () const {
11961202 return static_cast <double >(_max_fingerprints_per_block) / (_buckets_per_block * _slots_per_bucket);
11971203 }
11981204
11991205 // This is the $\alpha_C$ term in the paper.
1200- inline double report_block_occupancy (){
1206+ inline double report_block_occupancy () const {
12011207 uint64_t full_slots_count = 0 ;
12021208 for (uint64_t block_id = 0 ; block_id < _total_blocks; block_id++){
12031209 full_slots_count += get_bucket_start_index (block_id, _buckets_per_block);
@@ -1208,7 +1214,7 @@ namespace CompressedCuckoo{
12081214 // Methods specific to Morton filters
12091215
12101216 // Reports what fraction of the bits of the Overflow Tracking Array are set
1211- double report_ota_occupancy (){
1217+ double report_ota_occupancy () const {
12121218 uint64_t set_bit_count = 0 ;
12131219 if (_morton_filter_functionality_enabled){
12141220 for (uint64_t i = 0 ; i < _total_blocks; i++){
0 commit comments