Skip to content

Commit 17a5ebd

Browse files
author
Julian LALU
committed
Start adding SSE2 group for hashset
1 parent e97350f commit 17a5ebd

5 files changed

Lines changed: 287 additions & 123 deletions

File tree

interface/core/compiler_defines.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,19 @@ __arm__. If defined, you can further check:
4444
__powerpc64__
4545
__aarch64__
4646
47+
---- To check SIMD
48+
MSVC :
49+
AVX-512 __AVX512F__
50+
AVX2 __AVX2__
51+
AVX __AVX__
52+
SSE2 _M_IX86_FP == 2
53+
SSE _M_IX86_FP == 1
54+
Clang :
55+
AVX-512 __AVX512F__
56+
AVX2 __AVX2__
57+
AVX __AVX__
58+
SSE2 __SSE__
59+
SSE __SSE2__
4760
*/
4861

4962
/** Detect target OS */
@@ -180,4 +193,38 @@ static_assert(sizeof(void *) == 8, "HD_TARGET_64_BITS is defined but size of poi
180193
#error Target should be 32 bits or 64 bits
181194
#endif
182195

196+
/** Detect SIMD
197+
- MSVC :
198+
AVX-512 __AVX512F__
199+
AVX2 __AVX2__
200+
AVX __AVX__
201+
SSE2 _M_IX86_FP == 2
202+
SSE _M_IX86_FP == 1
203+
- Clang, GCC:
204+
AVX-512 __AVX512F__
205+
AVX2 __AVX2__
206+
AVX __AVX__
207+
SSE2 __SSE2__
208+
SSE __SSE__
209+
*/
210+
#if defined(__AVX512F__)
211+
#define HD_AVX512
212+
#endif
213+
#if defined(__AVX2__)
214+
#define HD_AVX2
215+
#endif
216+
#if defined(__AVX__)
217+
#define HD_AVX
218+
#endif
219+
#if defined(__SSE__) || (_M_IX86_FP == 2)
220+
#define HD_SSE2
221+
#endif
222+
#if defined(__SSE__) || (_M_IX86_FP == 1)
223+
#define HD_SSE
224+
#endif
225+
226+
// #if defined(__SSSE3__)
227+
// #define HD_SSSE3
228+
// #endif
229+
183230
#endif // HD_INC_CORE_COMPILER_DEFINES_H

interface/core/containers/hashset.h

Lines changed: 150 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include "../traits/conditional.h"
1818
#include "compressed_tuple.h"
1919

20+
#if defined(HD_SSSE3)
21+
#include <tmmintrin.h>
22+
#endif
2023
// TODO:
2124
// Move common to a common class that contains max_slot_count_, count_, control_ptr_, slot_ptr_ and free_slot_before_grow_
2225
// Hashset
@@ -362,7 +365,154 @@ namespace hud
362365
static constexpr control_type empty_byte = 0b10000000; // The slot is empty (0x80)
363366
static constexpr control_type deleted_byte = 0b11111110; // The slot is deleted (0xFE)
364367
static constexpr control_type sentinel_byte = 0b11111111; // The slot is a sentinel, A sentinel is a special caracter that mark the end of the control for iteration (0xFF)
368+
#if defined(HD_SSE2)
369+
struct sse2_group
370+
{
371+
static constexpr usize SLOT_PER_GROUP = 16;
372+
373+
struct mask
374+
{
375+
constexpr mask(u16 mask_value) noexcept
376+
: mask_value_ {mask_value}
377+
{
378+
}
379+
380+
[[nodiscard]]
381+
friend constexpr bool operator==(const mask &a, const mask &b) noexcept
382+
{
383+
return a.mask_value_ == b.mask_value_;
384+
}
385+
386+
[[nodiscard]]
387+
friend constexpr bool operator!=(const mask &a, const mask &b) noexcept
388+
{
389+
return !(a == b);
390+
}
391+
392+
/** Retrieves the index of the first non null byte set. 0 otherwise. */
393+
[[nodiscard]] constexpr u32 first_non_null_index() const noexcept
394+
{
395+
// Get number of trailing zero to get the insert offset of the byte
396+
return hud::bits::trailing_zeros(mask_value_);
397+
}
398+
399+
[[nodiscard]]
400+
constexpr operator u16() const noexcept
401+
{
402+
return mask_value_;
403+
}
404+
405+
protected:
406+
u16 mask_value_;
407+
};
408+
409+
struct empty_mask
410+
: mask
411+
{
412+
using mask::mask;
413+
414+
[[nodiscard]] constexpr bool has_empty_slot() const noexcept
415+
{
416+
return *this;
417+
}
418+
419+
[[nodiscard]] constexpr u32 first_empty_index() const noexcept
420+
{
421+
return first_non_null_index();
422+
}
423+
424+
[[nodiscard]] constexpr u32 trailing_zeros() const noexcept
425+
{
426+
return hud::bits::trailing_zeros(mask_value_);
427+
}
428+
429+
[[nodiscard]] constexpr u32 leading_zeros() const noexcept
430+
{
431+
return hud::bits::leading_zeros(mask_value_);
432+
}
433+
};
365434

435+
struct empty_or_deleted_mask
436+
: mask
437+
{
438+
using mask::mask;
439+
440+
[[nodiscard]] constexpr bool has_empty_or_deleted_slot() const noexcept
441+
{
442+
return *this;
443+
}
444+
445+
[[nodiscard]] constexpr u32 first_empty_or_deleted_index() const noexcept
446+
{
447+
return first_non_null_index();
448+
}
449+
};
450+
451+
struct full_mask
452+
: mask
453+
{
454+
using mask::mask;
455+
456+
[[nodiscard]] constexpr bool has_full_slot() const noexcept
457+
{
458+
return *this;
459+
}
460+
461+
[[nodiscard]] constexpr u32 first_full_index() const noexcept
462+
{
463+
return first_non_null_index();
464+
}
465+
};
466+
467+
/** Load a 16 bytes control into the group. */
468+
constexpr sse2_group(const control_type *control) noexcept
469+
: value_(hud::memory::unaligned_load128(control))
470+
{
471+
}
472+
473+
/**Retrieve a mask where H2 matching control byte have value 0x80 and non matching have value 0x00. */
474+
mask match(u8 h2_hash) const noexcept
475+
{
476+
__m128i match = _mm_set1_epi8(static_cast<char>(h2_hash));
477+
return mask(_mm_movemask_epi8(_mm_cmpeq_epi8(match, value_)));
478+
}
479+
480+
/** Retrieve a mask where empty control bytes have value 0x80 and others have value 0x00. */
481+
empty_mask mask_of_empty_slot() const noexcept
482+
{
483+
#if defined(HD_SSSE3)
484+
return _mm_movemask_epi8(_mm_sign_epi8(value_, value_));
485+
#else
486+
__m128i match = _mm_set1_epi8(static_cast<char>(empty_byte));
487+
return _mm_movemask_epi8(_mm_cmpeq_epi8(match, value_));
488+
#endif
489+
};
490+
491+
/** Retrieve a mask where empty and deleted control bytes have value 0x80 and others have value 0x00. */
492+
empty_or_deleted_mask mask_of_empty_or_deleted_slot() const noexcept
493+
{
494+
__m128i special = _mm_set1_epi8(static_cast<char>(sentinel_byte));
495+
return _mm_movemask_epi8(_mm_cmpgt_epi8(special, value_));
496+
}
497+
498+
/** Retrieve a mask where full control bytes have value 0x80 and others have value 0x00. */
499+
full_mask mask_of_full_slot() const noexcept
500+
{
501+
return _mm_movemask_epi8(value_) ^ 0xffff;
502+
}
503+
504+
u32 count_leading_empty_or_deleted() const noexcept
505+
{
506+
auto special = _mm_set1_epi8(static_cast<char>(sentinel_byte));
507+
u32 mask = _mm_movemask_epi8(_mm_cmpgt_epi8(special, value_)) + 1;
508+
return hud::bits::trailing_zeros(mask);
509+
}
510+
511+
private:
512+
/** The 16 bytes value of the group. */
513+
__m128i value_;
514+
};
515+
#endif
366516
/** Portable group used to analyze a group. */
367517
struct portable_group
368518
{
@@ -594,8 +744,6 @@ namespace hud
594744
// kDeleted. We lower all other bits and count number of trailing zeros.
595745
constexpr uint64_t bits {0x0101010101010101ULL};
596746
return static_cast<u32>(hud::bits::trailing_zeros((value_ | ~(value_ >> 7)) & bits) >> 3);
597-
// return static_cast<u32>(countr_zero((ctrl | ~(ctrl >> 7)) & bits) >> 3);
598-
// return 0;
599747
}
600748

601749
private:

interface/core/memory/memory.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
#include "../templates/move.h"
3838
#include "../templates/forward.h"
3939

40+
#if defined(HD_SSE2)
41+
#include <emmintrin.h>
42+
#endif
43+
4044
namespace hud
4145
{
4246
struct memory
@@ -1104,6 +1108,22 @@ namespace hud
11041108
return hud::bit_cast<u64>(result);
11051109
}
11061110

1111+
#if defined(HD_SSE2)
1112+
/** Load 128 bits value and return it. */
1113+
[[nodiscard]] static constexpr __m128i unaligned_load128(const i8 *buffer) noexcept
1114+
{
1115+
if (hud::is_constant_evaluated())
1116+
{
1117+
i8 result[sizeof(__m128i)];
1118+
copy_memory(result, buffer, sizeof(__m128i));
1119+
return hud::bit_cast<__m128i>(result);
1120+
}
1121+
else
1122+
{
1123+
return _mm_loadu_si128(reinterpret_cast<const __m128i *>(buffer));
1124+
}
1125+
}
1126+
#endif
11071127
/**
11081128
* Call constructor of type.
11091129
* @tparam type_t Type to construct

test/hashmap/hashmap_misc.cpp

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -10,80 +10,6 @@ GTEST_TEST(hashmap, hashmap_value_type_is_correct)
1010
hud_assert_true((hud::is_same_v<const i32, hud::hashmap<i64, const i32>::value_type>));
1111
}
1212

13-
GTEST_TEST(hashmap, metadata)
14-
{
15-
// Testing metadata byte filtering
16-
using control_type = hud::details::hashset::control_type;
17-
hud_assert_true(hud::details::hashset::control::is_byte_empty(hud::details::hashset::empty_byte));
18-
hud_assert_false(hud::details::hashset::control::is_byte_empty(hud::details::hashset::deleted_byte));
19-
hud_assert_false(hud::details::hashset::control::is_byte_empty(hud::details::hashset::sentinel_byte));
20-
hud_assert_false(hud::details::hashset::control::is_byte_empty(0x7F));
21-
22-
hud_assert_false(hud::details::hashset::control::is_byte_deleted(hud::details::hashset::empty_byte));
23-
hud_assert_true(hud::details::hashset::control::is_byte_deleted(hud::details::hashset::deleted_byte));
24-
hud_assert_false(hud::details::hashset::control::is_byte_deleted(hud::details::hashset::sentinel_byte));
25-
hud_assert_false(hud::details::hashset::control::is_byte_deleted(0x7F));
26-
27-
hud_assert_true(hud::details::hashset::control::is_byte_empty_or_deleted(hud::details::hashset::empty_byte));
28-
hud_assert_true(hud::details::hashset::control::is_byte_empty_or_deleted(hud::details::hashset::deleted_byte));
29-
hud_assert_false(hud::details::hashset::control::is_byte_empty_or_deleted(hud::details::hashset::sentinel_byte));
30-
hud_assert_false(hud::details::hashset::control::is_byte_empty_or_deleted(0x7F));
31-
32-
hud_assert_false(hud::details::hashset::control::is_byte_full(hud::details::hashset::empty_byte));
33-
hud_assert_false(hud::details::hashset::control::is_byte_full(hud::details::hashset::deleted_byte));
34-
hud_assert_false(hud::details::hashset::control::is_byte_full(hud::details::hashset::sentinel_byte));
35-
hud_assert_true(hud::details::hashset::control::is_byte_full(0x7F));
36-
37-
// Testing metadata group
38-
using group_type = hud::details::hashset::group_type;
39-
using mask_type = group_type::mask;
40-
using mask_empty_type = group_type::empty_mask;
41-
using mask_empty_or_deleted_type = group_type::empty_or_deleted_mask;
42-
using mask_full_type = group_type::full_mask;
43-
44-
u64 group_value = 0x80FEFF7F80FEFF7F;
45-
group_type g {reinterpret_cast<control_type *>(&group_value)};
46-
hud_assert_eq(g.match(0x7F), mask_type {0x0000008000000080});
47-
hud_assert_eq(g.mask_of_empty_or_deleted_slot(), mask_empty_or_deleted_type {0x8080000080800000});
48-
hud_assert_eq(g.mask_of_empty_slot(), mask_empty_type {0x8000000080000000});
49-
hud_assert_eq(g.mask_of_full_slot(), mask_full_type {0x0000008000000080});
50-
51-
// Test group at index
52-
// empty (0x80), deleted (0xFE), sentinel (0xFF)
53-
u64 two_group[2] = {0x7F00806DFE002A6D, 0x807B00800000FEFF};
54-
control_type *metadata_ptr(reinterpret_cast<control_type *>(&two_group));
55-
group_type g0 {metadata_ptr};
56-
// Read first group
57-
hud_assert_eq(g0.match(0x7F), mask_type {0x8000000000000000});
58-
hud_assert_eq(g0.match(0x2A), mask_type {0x0000000000008000});
59-
hud_assert_eq(g0.match(0x6D), mask_type {0x0000008000000080});
60-
hud_assert_eq(g0.mask_of_empty_or_deleted_slot(), mask_empty_or_deleted_type {0x0000800080000000});
61-
hud_assert_eq(g0.mask_of_empty_slot(), mask_empty_type {0x0000800000000000});
62-
hud_assert_eq(g0.mask_of_full_slot(), mask_full_type {0x8080008000808080});
63-
64-
group_type g1 {metadata_ptr + group_type::SLOT_PER_GROUP * 1};
65-
// Read second group
66-
hud_assert_eq(g1.match(0x7B), mask_type {0x0080000000000000});
67-
hud_assert_eq(g1.mask_of_empty_or_deleted_slot(), mask_empty_or_deleted_type {0x8000008000008000});
68-
hud_assert_eq(g1.mask_of_empty_slot(), mask_empty_type {0x8000008000000000});
69-
hud_assert_eq(g1.mask_of_full_slot(), mask_full_type {0x0080800080800000});
70-
71-
// Test find with group and iteration
72-
// Find the 2 indices of 0x6D in the group and iterate over it
73-
// Expect to have index 0 and 4 in the group 0x7F00806DFE002A6D
74-
group_type::mask h2_match_mask = g0.match(0x6D);
75-
u32 indices[2] = {hud::u32_max, hud::u32_max};
76-
u32 current_index = 0;
77-
for (u32 value : h2_match_mask)
78-
{
79-
hud_assert_ne(current_index, 2);
80-
indices[current_index] = value;
81-
++current_index;
82-
}
83-
hud_assert_eq(indices[0], 0);
84-
hud_assert_eq(indices[1], 4);
85-
}
86-
8713
GTEST_TEST(hashmap, count_return_count_of_element)
8814
{
8915
using type = usize;

0 commit comments

Comments
 (0)