Skip to content

Commit 326d42c

Browse files
author
Requiem
committed
remove: code duplications
1 parent e8885dd commit 326d42c

1 file changed

Lines changed: 0 additions & 149 deletions

File tree

src/vmaware.hpp

Lines changed: 0 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,155 +1394,6 @@ struct VM {
13941394
}
13951395

13961396

1397-
// we want to precompute hashes at compile time for C++11 and later, so we need to match the hardware _mm_crc32_u8
1398-
// it is based on CRC32-C (Castagnoli) polynomial
1399-
struct constexpr_hash {
1400-
static constexpr u32 crc32_bits(u32 crc, int bits) {
1401-
return (bits == 0) ? crc :
1402-
crc32_bits((crc >> 1) ^ ((crc & 1) ? 0x82F63B78u : 0), bits - 1);
1403-
}
1404-
static constexpr u32 crc32_str(const char* s, u32 crc) {
1405-
return (*s == '\0') ? crc :
1406-
crc32_str(s + 1, crc32_bits(crc ^ static_cast<u8>(*s), 8));
1407-
}
1408-
static constexpr u32 get(const char* s) {
1409-
return crc32_str(s, 0);
1410-
}
1411-
};
1412-
1413-
1414-
// this forces the compiler to calculate the hash when initializing the array while staying C++11 compatible
1415-
struct thread_entry {
1416-
u32 hash;
1417-
u32 threads;
1418-
constexpr thread_entry(const char* m, u32 t) : hash(constexpr_hash::get(m)), threads(t) {}
1419-
};
1420-
1421-
1422-
struct hasher {
1423-
static u32 crc32_sw(u32 crc, char data) {
1424-
crc ^= static_cast<u8>(data);
1425-
for (int i = 0; i < 8; ++i)
1426-
crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78u : 0);
1427-
return crc;
1428-
}
1429-
1430-
#if (CLANG || GCC)
1431-
__attribute__((__target__("crc32")))
1432-
#endif
1433-
static u32 crc32_hw(u32 crc, char data) {
1434-
return _mm_crc32_u8(crc, static_cast<u8>(data));
1435-
}
1436-
1437-
using hashfc = u32(*)(u32, char);
1438-
1439-
static hashfc get() {
1440-
// yes, vmaware runs on dinosaur cpus without sse4.2 pretty often
1441-
i32 regs[4];
1442-
cpu::cpuid(regs, 1);
1443-
const bool has_sse42 = (regs[2] & (1 << 20)) != 0;
1444-
1445-
return has_sse42 ? crc32_hw : crc32_sw;
1446-
}
1447-
};
1448-
1449-
1450-
static void lookup(
1451-
bool &found,
1452-
const char* str,
1453-
const thread_entry* thread_database,
1454-
const std::size_t& db_size,
1455-
std::size_t best_len,
1456-
const u8 max_token_length,
1457-
u32* z_series_threads,
1458-
u32& expected_threads,
1459-
const bool is_amd = false
1460-
) {
1461-
const hasher::hashfc hash_func = util::hasher::get();
1462-
1463-
for (size_t i = 0; str[i] != '\0'; ) {
1464-
char c = str[i];
1465-
if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
1466-
i++;
1467-
continue;
1468-
}
1469-
1470-
u32 current_hash = 0;
1471-
size_t current_len = 0;
1472-
size_t j = i;
1473-
1474-
while (true) {
1475-
char k = str[j];
1476-
const bool is_valid = (k >= '0' && k <= '9') ||
1477-
(k >= 'A' && k <= 'Z') ||
1478-
(k >= 'a' && k <= 'z') ||
1479-
(k == '-'); // models have hyphen
1480-
if (!is_valid) break;
1481-
1482-
if (current_len >= max_token_length) {
1483-
while (str[j] != '\0' && str[j] != ' ') j++; // fast forward to space/null
1484-
break;
1485-
}
1486-
1487-
if (is_amd) {
1488-
// convert to lowercase on-the-fly to match compile-time keys
1489-
if (k >= 'A' && k <= 'Z') k += 32;
1490-
}
1491-
1492-
/*
1493-
models are usually 8 or more bytes long, i.e. i9-10900K
1494-
so imagine we want to use u64, you hash the first 8 bytes i9-10900
1495-
but then you are left with K. You have to handle the tail
1496-
fetching 8 bytes would include the characters after the token, corrupting the hash
1497-
so a byte-by-byte loop is the most optimal choice here
1498-
*/
1499-
1500-
// since this technique is cross-platform, we cannot use a standard C++ try-catch block to catch a missing CPU instruction
1501-
// we could use preprocessor directives and add an exception handler (VEH/SEH or SIGHANDLER) but nah
1502-
current_hash = hash_func(current_hash, k);
1503-
current_len++;
1504-
j++;
1505-
1506-
// boundary check, only verify match if the token has ended (next char is not alphanumeric)
1507-
const char next = str[j];
1508-
const bool next_is_alnum = (next >= '0' && next <= '9') ||
1509-
(next >= 'A' && next <= 'Z') ||
1510-
(next >= 'a' && next <= 'z');
1511-
1512-
if (!next_is_alnum) {
1513-
// Check specific Z1 Extreme token
1514-
// Hash for "extreme" (CRC32-C) is 0x3D09D5B4
1515-
if (
1516-
is_amd &&
1517-
(z_series_threads != nullptr) &&
1518-
(current_hash == 0x3D09D5B4)
1519-
) {
1520-
*z_series_threads = 16;
1521-
}
1522-
1523-
// since it's a contiguous block of integers in .rodata/.rdata, this is extremely fast
1524-
for (std::size_t i = 0; i < db_size; ++i) {
1525-
if (
1526-
(thread_database[i].hash == current_hash) &&
1527-
(current_len > best_len)
1528-
) {
1529-
best_len = current_len;
1530-
expected_threads = thread_database[i].threads;
1531-
found = true;
1532-
1533-
// on intel, since hashing implies uniqueness in this dataset, you might say we could break
1534-
// here, but we continue to ensure we find the longest substring match if overlaps exist,
1535-
// so like it finds both "i9-11900" and "i9-11900K" i.e.
1536-
}
1537-
}
1538-
}
1539-
}
1540-
i = j;
1541-
}
1542-
}
1543-
1544-
1545-
15461397
// wrapper for std::make_unique because it's not available for C++11
15471398
template<typename T, typename... Args>
15481399
[[nodiscard]] static std::unique_ptr<T> make_unique(Args&&... args) {

0 commit comments

Comments
 (0)