Skip to content

Commit e64a0fa

Browse files
author
Julian LALU
committed
Natvis
1 parent 7d443ff commit e64a0fa

14 files changed

Lines changed: 208 additions & 113 deletions

File tree

.vscode/launch.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@
2929
"stopAtEntry": false,
3030
"cwd": "${workspaceFolder}",
3131
"externalConsole": false,
32+
"visualizerFile": "${workspaceFolder}/core.natvis",
33+
"logging": {
34+
"trace": true,
35+
"moduleLoad": false,
36+
"traceResponse": true,
37+
"engineLogging": true
38+
}
3239
},
3340
],
3441
}

core.vscode.code-workspace

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"testMate.cpp.test.executables": "target/**/*{test}*",
2626
"cmake.options.statusBarVisibility": "compact",
2727
"cmake.outputLogEncoding": "utf8",
28-
"debug.showVariableTypes": true,
28+
"debug.showVariableTypes": false,
2929
"rust-analyzer.checkOnSave": true,
3030
"dotnetAcquisitionExtension.enableTelemetry": false,
3131
},

interface/core/containers/compressed_pair.h

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,12 +1285,10 @@ namespace hud
12851285
[[nodiscard]] constexpr hud::tuple_element_t<element_index, compressed_pair<first_type, second_type>> &get(compressed_pair<first_type, second_type> &compressed_pair) noexcept
12861286
{
12871287
static_assert(element_index < 2, "compressed_pair index out of bounds");
1288-
if constexpr (element_index == 0)
1289-
{
1288+
if constexpr (element_index == 0) {
12901289
return compressed_pair.first();
12911290
}
1292-
else if constexpr (element_index == 1)
1293-
{
1291+
else if constexpr (element_index == 1) {
12941292
return compressed_pair.second();
12951293
}
12961294
}
@@ -1307,12 +1305,10 @@ namespace hud
13071305
[[nodiscard]] constexpr const hud::tuple_element_t<element_index, const compressed_pair<first_type, second_type>> &get(const compressed_pair<first_type, second_type> &compressed_pair) noexcept
13081306
{
13091307
static_assert(element_index < 2, "compressed_pair index out of bounds");
1310-
if constexpr (element_index == 0)
1311-
{
1308+
if constexpr (element_index == 0) {
13121309
return compressed_pair.first();
13131310
}
1314-
else if constexpr (element_index == 1)
1315-
{
1311+
else if constexpr (element_index == 1) {
13161312
return compressed_pair.second();
13171313
}
13181314
}
@@ -1329,12 +1325,10 @@ namespace hud
13291325
[[nodiscard]] constexpr hud::tuple_element_t<element_index, compressed_pair<first_type, second_type>> &&get(compressed_pair<first_type, second_type> &&compressed_pair) noexcept
13301326
{
13311327
static_assert(element_index < 2, "compressed_pair index out of bounds");
1332-
if constexpr (element_index == 0)
1333-
{
1328+
if constexpr (element_index == 0) {
13341329
return hud::forward<first_type &&>(compressed_pair.first());
13351330
}
1336-
else if constexpr (element_index == 1)
1337-
{
1331+
else if constexpr (element_index == 1) {
13381332
return hud::forward<second_type &&>(compressed_pair.second());
13391333
}
13401334
}
@@ -1351,12 +1345,10 @@ namespace hud
13511345
[[nodiscard]] constexpr const hud::tuple_element_t<element_index, compressed_pair<first_type, second_type>> &&get(const compressed_pair<first_type, second_type> &&compressed_pair) noexcept
13521346
{
13531347
static_assert(element_index < 2, "compressed_pair index out of bounds");
1354-
if constexpr (element_index == 0)
1355-
{
1348+
if constexpr (element_index == 0) {
13561349
return hud::forward<const first_type &&>(compressed_pair.first());
13571350
}
1358-
else if constexpr (element_index == 1)
1359-
{
1351+
else if constexpr (element_index == 1) {
13601352
return hud::forward<const second_type &&>(compressed_pair.second());
13611353
}
13621354
}

interface/core/memory/memory.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
#include "../templates/forward.h"
3939

4040
#if defined(HD_SSE2)
41-
#include <emmintrin.h>
41+
#include <emmintrin.h>
4242
#endif
4343

4444
namespace hud
@@ -880,13 +880,15 @@ namespace hud
880880
* - 0 if bytes in buffer1 and buffer2 are equal
881881
* - Positive value if the first differing byte in buffer1 is greater than the corresponding byte in buffer2
882882
*/
883-
[[nodiscard]] static HD_FORCEINLINE constexpr i32 compare_memory(const u8 *buffer1, const u8 *buffer2, const usize size) noexcept
883+
template<typename byte_t>
884+
requires(sizeof(byte_t) == 1)
885+
[[nodiscard]] static HD_FORCEINLINE constexpr i32 compare_memory(const byte_t *buffer1, const byte_t *buffer2, const usize size) noexcept
884886
{
885887
if consteval
886888
// LCOV_EXCL_START
887889
{
888-
const u8 *lhs = buffer1;
889-
const u8 *rhs = buffer2;
890+
const byte_t *lhs = buffer1;
891+
const byte_t *rhs = buffer2;
890892
for (usize position = 0; position < size; position++) {
891893
i32 diff = *lhs - *rhs;
892894
if (diff) {
@@ -939,7 +941,9 @@ namespace hud
939941
* @param size Number of bytes to compare
940942
* @return true if both buffers are equal, false otherwise
941943
*/
942-
[[nodiscard]] static HD_FORCEINLINE constexpr bool is_memory_compare_equal(const u8 *buffer1, const u8 *buffer2, const usize size) noexcept
944+
template<typename byte_t>
945+
requires(sizeof(byte_t) == 1)
946+
[[nodiscard]] static HD_FORCEINLINE constexpr bool is_memory_compare_equal(const byte_t *buffer1, const byte_t *buffer2, const usize size) noexcept
943947
{
944948
return compare_memory(buffer1, buffer2, size) == 0;
945949
}

interface/core/string/cstring_view.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define HD_INC_CORE_STRING_CSTRING_VIEW_H
33
#include "cstring.h"
44
#include "../slice.h"
5+
#include "unicode/utf8.h"
6+
57
namespace hud
68
{
79
/**
@@ -258,9 +260,23 @@ namespace hud
258260
return ptr_[i];
259261
}
260262

263+
/**
264+
* Validates whether the cstring_view is well-formed UTF-8 according to the Unicode specification.
265+
*
266+
* This function checks each sequence of bytes (ASCII, 2-, 3-, or 4-byte sequences) and ensures
267+
* the following rules are respected:
268+
* - ASCII bytes (< 0x80) are accepted directly.
269+
* - Multi-byte sequences must follow the correct pattern (10xxxxxx after a valid leading byte).
270+
* - Overlong encodings are rejected.
271+
* - Disallowed values (such as surrogates [U+D800, U+DFFF]) are rejected.
272+
* - Code points above U+10FFFF are rejected.
273+
*
274+
* @return true if the input is valid UTF-8, false otherwise.
275+
*/
261276
[[nodiscard]]
262277
constexpr bool is_valid_utf8() const noexcept
263278
{
279+
return hud::unicode::is_valid_utf8(as_slice());
264280
}
265281

266282
private:

interface/core/string/string.h

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#ifndef HD_INC_CORE_STRING_STRING_H
22
#define HD_INC_CORE_STRING_STRING_H
33
#include "../containers/vector.h"
4+
#include "../containers/optional.h"
5+
#include "unicode/utf8.h"
6+
#include "cstring_view.h"
7+
8+
constexpr const char8 *const check_utf8 = "é";
9+
static_assert(check_utf8[0] == char8(0xC3) && check_utf8[1] == char8(0xA9), "Compiler did not interpret source as UTF-8!");
410

511
namespace hud
612
{
@@ -12,22 +18,52 @@ namespace hud
1218
constexpr string(string &&) noexcept = default;
1319
constexpr string &operator=(const string &) noexcept = default;
1420
constexpr string &operator=(string &&) noexcept = default;
15-
[[nodiscard]] constexpr usize count() const noexcept
21+
constexpr ~string() noexcept = default;
22+
23+
[[nodiscard]] constexpr usize byte_count() const noexcept
1624
{
17-
return data_.count();
25+
return data_.byte_count();
1826
}
19-
[[nodiscard]] constexpr usize max_count() const noexcept
27+
[[nodiscard]] constexpr usize max_byte_count() const noexcept
2028
{
2129
return data_.max_count();
2230
}
23-
[[nodiscard]] constexpr const char8 *data() const noexcept
31+
[[nodiscard]] constexpr const char8 *bytes() const noexcept
2432
{
2533
return data_.data();
2634
}
2735

36+
private:
37+
template<typename char_t>
38+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
39+
constexpr string(const hud::slice<char_t> slice) noexcept
40+
: data_ {slice.data(), slice.count()}
41+
{
42+
}
43+
44+
template<typename char_t>
45+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
46+
friend constexpr hud::optional<hud::string> make_string(const hud::slice<char_t> slice) noexcept;
47+
2848
private:
2949
hud::vector<char8> data_;
3050
};
51+
52+
template<typename char_t>
53+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
54+
constexpr hud::optional<hud::string> make_string(const hud::slice<char_t> slice) noexcept
55+
{
56+
if (hud::unicode::is_valid_utf8(slice))
57+
return hud::string {slice};
58+
return hud::nullopt;
59+
}
60+
61+
template<typename char_t>
62+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
63+
[[nodiscard]] constexpr hud::optional<hud::string> make_string(const hud::cstring_view<char_t> view) noexcept
64+
{
65+
return make_string(view.as_slice());
66+
}
3167
} // namespace hud
3268

3369
#endif // HD_INC_CORE_STRING_STRING_H

interface/core/string/string_view.h

Lines changed: 0 additions & 21 deletions
This file was deleted.

interface/core/string/unicode/utf8.h

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,45 @@
11
#ifndef HD_INC_CORE_STRING_UNICODE_UTF8_H
22
#define HD_INC_CORE_STRING_UNICODE_UTF8_H
3+
#include "../../slice.h"
4+
#include "../../traits/is_same.h"
5+
#include "../../traits/remove_cv.h"
36

47
namespace hud::unicode
58
{
6-
[[nodiscard]] static constexpr bool is_valid_utf8_portable(const char8 *string, usize byte_count) noexcept
9+
/**
10+
* Validates whether a given byte sequence is well-formed UTF-8 according to the Unicode specification.
11+
*
12+
* This function checks each sequence of bytes (ASCII, 2-, 3-, or 4-byte sequences) and ensures
13+
* the following rules are respected:
14+
* - ASCII bytes (< 0x80) are accepted directly.
15+
* - Multi-byte sequences must follow the correct pattern (10xxxxxx after a valid leading byte).
16+
* - Overlong encodings are rejected.
17+
* - Disallowed values (such as surrogates [U+D800, U+DFFF]) are rejected.
18+
* - Code points above U+10FFFF are rejected.
19+
*
20+
* An optimization is applied to quickly skip blocks of 16 consecutive ASCII bytes in a single operation.
21+
*
22+
* @tparam char_t Expected character type (must be `char8` or equivalent).
23+
* @param string UTF-8 byte sequence to validate.
24+
* @return true if the input is valid UTF-8, false otherwise.
25+
*/
26+
template<typename char_t>
27+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
28+
[[nodiscard]] static constexpr bool is_valid_utf8_portable(const hud::slice<char_t> string) noexcept
729
{
830
usize pos = 0;
931
u32 code_point = 0;
32+
usize byte_count = string.byte_count();
33+
const char8 *str = string.data();
34+
1035
while (pos < byte_count) {
1136
// Optimization step:
1237
// If the next 16 bytes are guaranteed to be ASCII (all < 128),
1338
// we can skip them all at once instead of checking byte by byte.
1439
usize next_pos = pos + 16;
15-
if (next_pos <= byte_count) { // Make sure we don't read past the buffer
16-
u64 v1 = hud::memory::unaligned_load64(string + pos); // load first 8 bytes
17-
u64 v2 = hud::memory::unaligned_load64(string + pos + sizeof(u64)); // load next 8 bytes
40+
if (next_pos <= byte_count) { // Make sure we don't read past the buffer
41+
u64 v1 = hud::memory::unaligned_load64(str + pos); // load first 8 bytes
42+
u64 v2 = hud::memory::unaligned_load64(str + pos + sizeof(u64)); // load next 8 bytes
1843
// Bitwise OR combines both 8-byte blocks so we only need a single mask test below.
1944
// If any byte in v1 or v2 has its high bit set (>= 0x80, non-ASCII),
2045
// the result will also have that bit set. This lets us quickly check
@@ -27,15 +52,15 @@ namespace hud::unicode
2752
}
2853

2954
// Now process byte by byte
30-
unsigned char byte = string[pos];
55+
unsigned char byte = str[pos];
3156

3257
// Consume consecutive ASCII bytes.
3358
// This inner loop skips multiple ASCII chars in a row efficiently.
3459
while ((byte & 0x80) == 0) {
3560
if (++pos == byte_count) {
3661
return true;
3762
}
38-
byte = string[pos];
63+
byte = str[pos];
3964
}
4065

4166
// Case: 2-byte sequence -> 110xxxxx 10xxxxxx
@@ -49,11 +74,11 @@ namespace hud::unicode
4974
return false;
5075
}
5176
// Ensure 1st continuous byte is 10xxxxxx
52-
if ((string[pos + 1] & 0b11000000) != 0b10000000) {
77+
if ((str[pos + 1] & 0b11000000) != 0b10000000) {
5378
return false;
5479
}
5580
// Read the code point
56-
code_point = (byte & 0b00011111) << 6 | (string[pos + 1] & 0b00111111);
81+
code_point = (byte & 0b00011111) << 6 | (str[pos + 1] & 0b00111111);
5782
// Ensure code point is [0x80, 0x7FF] aka [U+0080, U+07FF]
5883
if ((code_point < 0x80) || (0x7ff < code_point)) {
5984
return false;
@@ -70,15 +95,15 @@ namespace hud::unicode
7095
return false;
7196
}
7297
// Ensure 1st continuous byte is 10xxxxxx
73-
if ((string[pos + 1] & 0b11000000) != 0b10000000) {
98+
if ((str[pos + 1] & 0b11000000) != 0b10000000) {
7499
return false;
75100
}
76101
// Ensure 2nd continuous byte is 10xxxxxx
77-
if ((string[pos + 2] & 0b11000000) != 0b10000000) {
102+
if ((str[pos + 2] & 0b11000000) != 0b10000000) {
78103
return false;
79104
}
80105
// Read the code point
81-
code_point = (byte & 0b00001111) << 12 | (string[pos + 1] & 0b00111111) << 6 | (string[pos + 2] & 0b00111111);
106+
code_point = (byte & 0b00001111) << 12 | (str[pos + 1] & 0b00111111) << 6 | (str[pos + 2] & 0b00111111);
82107
// Check code point valid value
83108
// - must not be overlong encoding (< 0x800 is invalid)
84109
// - must be [0x0800, 0xFFFF] aka [U+0800, U+FFFF]
@@ -127,6 +152,30 @@ namespace hud::unicode
127152
}
128153
return true;
129154
}
155+
156+
/**
157+
* Validates whether a given byte sequence is well-formed UTF-8 according to the Unicode specification.
158+
*
159+
* This function checks each sequence of bytes (ASCII, 2-, 3-, or 4-byte sequences) and ensures
160+
* the following rules are respected:
161+
* - ASCII bytes (< 0x80) are accepted directly.
162+
* - Multi-byte sequences must follow the correct pattern (10xxxxxx after a valid leading byte).
163+
* - Overlong encodings are rejected.
164+
* - Disallowed values (such as surrogates [U+D800, U+DFFF]) are rejected.
165+
* - Code points above U+10FFFF are rejected.
166+
*
167+
* An optimization is applied to quickly skip blocks of 16 consecutive ASCII bytes in a single operation.
168+
*
169+
* @tparam char_t Expected character type (must be `char8` or equivalent).
170+
* @param string UTF-8 byte sequence to validate.
171+
* @return true if the input is valid UTF-8, false otherwise.
172+
*/
173+
template<typename char_t>
174+
requires(hud::is_same_v<hud::remove_cv_t<char_t>, char8>)
175+
[[nodiscard]] static constexpr bool is_valid_utf8(const hud::slice<char_t> string) noexcept
176+
{
177+
return is_valid_utf8_portable(string);
178+
}
130179
} // namespace hud::unicode
131180

132181
#endif // HD_INC_CORE_STRING_UNICODE_UTF8_H

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ endif()
3131
# Include Interface directory to inclusion path
3232
target_include_directories(${lib_name} PUBLIC ../interface)
3333

34+
set_property(TARGET ${lib_name} PROPERTY VS_DEBUGGER_VISUALIZER ${CMAKE_SOURCE_DIR}/core.natvis)
3435

3536
target_precompile_headers(${lib_name} PRIVATE precompiled.h)
3637

0 commit comments

Comments
 (0)