Skip to content

Commit cb7b788

Browse files
authored
Merge pull request #3 from abreslow/master
Fixes bugs and warnings. Specific fixes include https://github.com/A…
2 parents 5492090 + 6c57563 commit cb7b788

File tree

5 files changed

+27
-13
lines changed

5 files changed

+27
-13
lines changed

benchmarking/Makefile

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,20 @@
3434

3535

3636
ifndef CXX
37-
CXX=g++ # Code uses GCC builtins. I do not recommend changing the compiler.
37+
CXX=g++ # Code uses GCC builtins. I recommend using either g++ or clang++.
38+
# GCC's g++ is probably preferred since it appears to yield better
39+
# throughput numbers. However, the results are roughly comparable.
3840
endif
3941

42+
# Comment the line below back in if you want to instrument the code with the
43+
# sanitizer functionality
44+
#SANITIZE:=-fsanitize=address,undefined,leak
45+
4046
OPT=-Ofast -march=native -mpopcnt
4147

42-
FLAGS:=-Wall -Winline -g -std=c++11 $(OPT)
48+
FLAGS:=-Wall -Winline -g -std=c++11 $(OPT) $(SANITIZE)
49+
50+
# Need to routinely check for bugs with -fsanitize=address -fsanitize=undefined
4351

4452
TARGETS=benchmark \
4553
benchmark_cf benchmark_mf \

bf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ namespace BlockedBF{
6060
inline bool contains_and_update(const hash_t item){
6161
constexpr slot_type one = 1;
6262
constexpr slot_type slot_width_bits = sizeof(slot_type) * 8;
63-
constexpr slot_type log2_slot_width = log2(slot_width_bits);
63+
constexpr slot_type log2_slot_width = util::log2ceil(slot_width_bits);
6464
constexpr slot_type shift0 = 0;
6565
constexpr slot_type shift1 = log2_slot_width;
6666
constexpr slot_type shift2 = log2_slot_width * 2;

block.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,6 @@ struct Block{ // Assuming block size is a multiple of atom_t's size in bytes
413413
memcpy(source_address, &item, field_width_bytes);
414414
}
415415

416-
__attribute__((optimize("unroll-loops")))
417416
INLINE void add_cross_left_displace(uint64_t raw_offset_bits,
418417
uint64_t field_width_bits, uint64_t index, atom_t item){
419418
uint64_t global_index = index * field_width_bits + raw_offset_bits;

compressed_cuckoo_filter.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,9 @@ namespace CompressedCuckoo{
457457
// Currently set to false because clear_swath hasn't been rigorously tested
458458
constexpr bool _only_clear_ota_and_fca = false;
459459
if(!_only_clear_ota_and_fca){ // Competitive with the code in the loop below
460-
memset(storage, 0x0, allocation_size);
460+
for(hash_t i = 0; i < total_blocks; i++){
461+
storage[i] = block_t{};
462+
}
461463
return storage;
462464
} // ELSE
463465
for(hash_t i = 0; i < total_blocks; i++){
@@ -496,6 +498,9 @@ namespace CompressedCuckoo{
496498

497499
size_t allocation_size = sizeof(*_summed_counters) *
498500
(_buckets_per_block + 1);
501+
// Round allocation size up to a multiple of 64.
502+
allocation_size = (allocation_size + g_cache_line_size_bytes - 1) &
503+
~(g_cache_line_size_bytes - 1);
499504
_summed_counters = static_cast<decltype(_summed_counters)>(aligned_alloc(g_cache_line_size_bytes, allocation_size));
500505
// Memset not required, initialized by full_exclusive_scan
501506

@@ -665,8 +670,8 @@ namespace CompressedCuckoo{
665670
constexpr uint_fast8_t num_passes = util::log2ceil(_buckets_per_block);
666671
static_assert(_max_fingerprints_per_block > 0, "The number of fingerprints "
667672
"per block must be one or more.");
668-
constexpr uint_fast8_t masked_count = static_cast<uint_fast8_t>(ceil(log2(
669-
_max_fingerprints_per_block) / _fullness_counter_width)) - 1;
673+
constexpr uint_fast8_t masked_count = static_cast<uint_fast8_t>(util::log2ceil(
674+
_max_fingerprints_per_block) / _fullness_counter_width) - 1;
670675
for(int8_t i = 0; i < masked_count; i++){ // Masked to avoid overflows
671676
sum = (sum & _reduction_masks[i]) + ((sum >> (_fullness_counter_width << i)) & _reduction_masks[i]);
672677
}
@@ -730,8 +735,9 @@ namespace CompressedCuckoo{
730735
INLINE uint16_t exclusive_reduce_with_popcount128(const block_t& b,
731736
uint8_t counter_index) const{
732737
constexpr __uint128_t one = 1;
733-
const __uint128_t mask = (one << (_fullness_counter_width * counter_index))
734-
- one;
738+
// Thanks to @asl for the bugfix https://github.com/AMDComputeLibraries/morton_filter/issues/2#issuecomment-568480311
739+
const uint64_t shift = _fullness_counter_width * counter_index;
740+
const __uint128_t mask = shift == 128 ? __uint128_t(-1) : (one << shift) - one;
735741
uint8_t sum = 0u;
736742
__uint128_t counters;
737743
memcpy(&counters, &b, sizeof(__uint128_t));
@@ -1192,12 +1198,12 @@ namespace CompressedCuckoo{
11921198

11931199
// Reports the C parameter from the VLDB'18 paper. This is the ratio of physical slots in the FSA
11941200
// to logical slots per block.
1195-
constexpr double report_compression_ratio(){
1201+
constexpr double report_compression_ratio() const{
11961202
return static_cast<double>(_max_fingerprints_per_block) / (_buckets_per_block * _slots_per_bucket);
11971203
}
11981204

11991205
// This is the $\alpha_C$ term in the paper.
1200-
inline double report_block_occupancy(){
1206+
inline double report_block_occupancy() const{
12011207
uint64_t full_slots_count = 0;
12021208
for(uint64_t block_id = 0; block_id < _total_blocks; block_id++){
12031209
full_slots_count += get_bucket_start_index(block_id, _buckets_per_block);
@@ -1208,7 +1214,7 @@ namespace CompressedCuckoo{
12081214
// Methods specific to Morton filters
12091215

12101216
// Reports what fraction of the bits of the Overflow Tracking Array are set
1211-
double report_ota_occupancy(){
1217+
double report_ota_occupancy() const{
12121218
uint64_t set_bit_count = 0;
12131219
if(_morton_filter_functionality_enabled){
12141220
for(uint64_t i = 0; i < _total_blocks; i++){

util.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ namespace util{
7474
// This could be implemented using fancy binary arithmatic or builtins,
7575
// but this probably suffices if the integer is known at compile time.
7676
constexpr inline uint32_t log2ceil(uint32_t integer){
77-
//return ceil(log2(integer));
77+
// I'm doing it this way because log2 is not a constexpr function in the
78+
// Clang toolchain whereas __builtin_clz is.
7879
return 32u - __builtin_clz(integer - 1u);
7980
}
8081

0 commit comments

Comments
 (0)