kernelwernel
diff --git a/‎src/txt.txt‎
Lines changed: 199 additions & 0 deletions b/‎src/txt.txt‎
Lines changed: 199 additions & 0 deletions
@@ -0,0 +1,199 @@
+        for (size_t i = 0; str[i] != '\0'; ) {
+            const char c = str[i];
+            if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
+                i++;
+                continue;
+            }
+
+            u32 current_hash = 0;
+            size_t current_len = 0;
+            size_t j = i;
+
+            while (true) {
+                const char k = str[j];
+                const bool is_valid = (k >= '0' && k <= '9') ||
+                    (k >= 'A' && k <= 'Z') ||
+                    (k >= 'a' && k <= 'z') ||
+                    (k == '-');
+                if (!is_valid) break;
+
+                if (current_len >= MAX_XEON_MODEL_LEN) {
+                    while (str[j] != '\0' && str[j] != ' ') j++; // fast forward to space/null
+                    break;
+                }
+
+                /*
+                   models are usually 8 or more bytes long, i.e. i9-10900K
+                   so imagine we want to use u64, you hash the first 8 bytes i9-10900
+                   but then you are left with K. You have to handle the tail
+                   fetching 8 bytes would include the characters after the token, corrupting the hash
+                   so a byte-by-byte loop is the most optimal choice here
+                */
+
+                // since this technique is cross-platform, we cannot use a standard C++ try-catch block to catch a missing CPU instruction
+                // we could use preprocessor directives and add an exception handler (VEH/SEH or SIGHANDLER) but nah
+                current_hash = hash_func(current_hash, k);
+                current_len++;
+                j++;
+
+                // only verify match if the token has ended (next char is not alphanumeric)
+                const char next = str[j];
+                const bool next_is_alnum = (next >= '0' && next <= '9') ||
+                    (next >= 'A' && next <= 'Z') ||
+                    (next >= 'a' && next <= 'z');
+
+                if (!next_is_alnum) {
+                    // since it's a contiguous block of integers in .rodata/.rdata, this is extremely fast
+                    for (const auto& entry : thread_database) {
+                        if (entry.hash == current_hash) {
+                            if (current_len > best_len) {
+                                best_len = current_len;
+                                expected_threads = entry.threads;
+                                found = true;
+                            }
+                            // since hashing implies uniqueness in this dataset, you might say we could break here,
+                            // but we continue to ensure we find the longest substring match if overlaps exist,
+                            // so like it finds both "i9-11900" and "i9-11900K" i.e.
+                        }
+                    }
+                }
+            }
+            i = j;
+        }
+
+
+
+
+
+
+
+
+
+
+
+
+
+                for (size_t i = 0; str[i] != '\0'; ) {
+            const char c = str[i];
+            if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
+                i++;
+                continue;
+            }
+
+            u32 current_hash = 0;
+            size_t current_len = 0;
+            size_t j = i;
+
+            while (true) {
+                const char k = str[j];
+                const bool is_valid = (k >= '0' && k <= '9') ||
+                    (k >= 'A' && k <= 'Z') ||
+                    (k >= 'a' && k <= 'z') ||
+                    (k == '-'); // models have hyphen
+                if (!is_valid) break;
+
+                if (current_len >= MAX_INTEL_MODEL_LEN) {
+                    while (str[j] != '\0' && str[j] != ' ') j++; // fast forward to space/null
+                    break;
+                }
+
+                /*
+                   models are usually 8 or more bytes long, i.e. i9-10900K
+                   so imagine we want to use u64, you hash the first 8 bytes i9-10900
+                   but then you are left with K. You have to handle the tail
+                   fetching 8 bytes would include the characters after the token, corrupting the hash
+                   so a byte-by-byte loop is the most optimal choice here
+                */
+
+                // since this technique is cross-platform, we cannot use a standard C++ try-catch block to catch a missing CPU instruction
+                // we could use preprocessor directives and add an exception handler (VEH/SEH or SIGHANDLER) but nah
+                current_hash = hash_func(current_hash, k);
+                current_len++;
+                j++;
+
+                // only verify match if the token has ended (next char is not alphanumeric)
+                const char next = str[j];
+                const bool next_is_alnum = (next >= '0' && next <= '9') ||
+                    (next >= 'A' && next <= 'Z') ||
+                    (next >= 'a' && next <= 'z');
+
+                if (!next_is_alnum) {
+                    // since it's a contiguous block of integers in .rodata/.rdata, this is extremely fast
+                    for (const auto& entry : thread_database) {
+                        if (entry.hash == current_hash) {
+                            if (current_len > best_len) {
+                                best_len = current_len;
+                                expected_threads = entry.threads;
+                                found = true;
+                            }
+                            // since hashing implies uniqueness in this dataset, you might say we could break here,
+                            // but we continue to ensure we find the longest substring match if overlaps exist,
+                            // so like it finds both "i9-11900" and "i9-11900K" i.e.
+                        }
+                    }
+                }
+            }
+            i = j;
+        }
+
+
+
+
+
+
+
+                for (size_t i = 0; str[i] != '\0'; ) {
+            char c = str[i];
+            if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
+                i++;
+                continue;
+            }
+
+            u32 current_hash = 0;
+            size_t current_len = 0;
+            size_t j = i;
+
+            while (true) {
+                char k = str[j];
+                const bool is_valid = (k >= '0' && k <= '9') ||
+                    (k >= 'A' && k <= 'Z') ||
+                    (k >= 'a' && k <= 'z') ||
+                    (k == '-');
+                if (!is_valid) break;
+
+                if (current_len >= MAX_AMD_TOKEN_LEN) {
+                    while (str[j] != '\0' && str[j] != ' ') j++;
+                    break;
+                }
+
+                // convert to lowercase on-the-fly to match compile-time keys
+                if (k >= 'A' && k <= 'Z') k += 32;
+
+                current_hash = hash_func(current_hash, k);
+                current_len++;
+                j++;
+
+                // boundary check
+                const char next = str[j];
+                const bool next_is_alnum = (next >= '0' && next <= '9') ||
+                    (next >= 'A' && next <= 'Z') ||
+                    (next >= 'a' && next <= 'z');
+
+                if (!next_is_alnum) {
+                    // Check specific Z1 Extreme token
+                    // Hash for "extreme" (CRC32-C) is 0x3D09D5B4
+                    if (current_hash == 0x3D09D5B4) { z_series_threads = 16; }
+
+                    for (const auto& entry : db_entries) {
+                        if (entry.hash == current_hash) {
+                            if (current_len > best_len) {
+                                best_len = current_len;
+                                expected_threads = entry.threads;
+                                found = true;
+                            }
+                        }
+                    }
+                }
+            }
+            i = j;
+        }