@@ -3473,6 +3473,25 @@ struct VM {
34733473 return (base_str.find(keyword) != std::string::npos);
34743474 };
34753475
3476+ [[nodiscard]] static i32 popcount(u64 v) {
3477+ #if (GCC) || (CLANG)
3478+ return __builtin_popcountll(v);
3479+ #elif (MSVC)
3480+ #if (x86_32)
3481+ return static_cast<int>(
3482+ __popcnt(static_cast<unsigned int>(v)) +
3483+ __popcnt(static_cast<unsigned int>(v >> 32))
3484+ );
3485+ #else
3486+ return static_cast<int>(__popcnt64(static_cast<unsigned long long>(v)));
3487+ #endif
3488+ #else
3489+ int c = 0;
3490+ while (v) { c += static_cast<int>(v & 1ull); v >>= 1; }
3491+ return c;
3492+ #endif
3493+ };
3494+
34763495 static std::string narrow_wide(const wchar_t* wstr) {
34773496 if (!wstr) return std::string{};
34783497 std::wstring ws(wstr);
@@ -4005,10 +4024,13 @@ struct VM {
40054024
40064025 while (true) {
40074026 char k = str[j];
4008- const bool is_valid = (k >= '0' && k <= '9') ||
4027+ const bool is_valid = (
4028+ (k >= '0' && k <= '9') ||
40094029 (k >= 'A' && k <= 'Z') ||
40104030 (k >= 'a' && k <= 'z') ||
4011- (k == '-'); // models have hyphen
4031+ (k == '-') // models have hyphen
4032+ );
4033+
40124034 if (!is_valid) break;
40134035
40144036 if (current_len >= max_model_len) {
@@ -4046,16 +4068,18 @@ struct VM {
40464068
40474069 // since it's a contiguous block of integers in .rodata/.rdata, this is extremely fast
40484070 for (size_t idx = 0; idx < db_size; ++idx) {
4049- if (db[idx].hash == current_hash) {
4050- if (current_len > best_len) {
4051- best_len = current_len;
4052- expected_threads = db[idx].threads;
4053- found = true;
4054- }
4055- // since hashing implies uniqueness in this dataset, you might say we could break here,
4056- // but we continue to ensure we find the longest substring match if overlaps exist,
4057- // so like it finds both "i9-11900" and "i9-11900K" i.e.
4071+ if (db[idx].hash != current_hash) {
4072+ continue;
4073+ }
4074+
4075+ if (current_len > best_len) {
4076+ best_len = current_len;
4077+ expected_threads = db[idx].threads;
4078+ found = true;
40584079 }
4080+ // since hashing implies uniqueness in this dataset, you might say we could break here,
4081+ // but we continue to ensure we find the longest substring match if overlaps exist,
4082+ // so like it finds both "i9-11900" and "i9-11900K" i.e.
40594083 }
40604084 }
40614085 }
@@ -5019,105 +5043,145 @@ struct VM {
50195043 return false;
50205044 #else
50215045 auto is_smt_enabled = []() noexcept -> bool {
5022- auto popcount = [](uint64_t v) noexcept -> int {
5023- #if (GCC) || (CLANG)
5024- return __builtin_popcountll(v);
5025- #elif (MSVC)
5026- #if (x86_32)
5027- return static_cast<int>(
5028- __popcnt(static_cast<unsigned int>(v)) +
5029- __popcnt(static_cast<unsigned int>(v >> 32))
5030- );
5031- #else
5032- return static_cast<int>(__popcnt64(static_cast<unsigned long long>(v)));
5033- #endif
5034- #else
5035- int c = 0;
5036- while (v) { c += static_cast<int>(v & 1ull); v >>= 1; }
5037- return c;
5038- #endif
5039- };
50405046 #if (WINDOWS)
50415047 DWORD len = 0;
50425048 if (GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &len) ||
50435049 GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
50445050 return false;
50455051 }
5052+
50465053 std::vector<char> buf(static_cast<size_t>(len));
50475054 if (!GetLogicalProcessorInformationEx(RelationProcessorCore,
50485055 reinterpret_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*>(buf.data()), &len)) {
50495056 return false;
50505057 }
5058+
50515059 // first RelationProcessorCore record encountered, basically if two logical processors maps to the same core, SMT is enabled to the OS point of view
50525060 size_t offset = 0;
50535061 while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) <= static_cast<size_t>(len)) {
50545062 auto rec = reinterpret_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*>(buf.data() + offset);
5063+
50555064 if (rec->Relationship == RelationProcessorCore) {
50565065 const PROCESSOR_RELATIONSHIP& pr = rec->Processor;
50575066 unsigned total = 0;
5067+
50585068 for (WORD i = 0; i < pr.GroupCount; ++i) {
5059- total += popcount(static_cast<uint64_t>(pr.GroupMask[i].Mask));
5069+ total += util:: popcount(static_cast<uint64_t>(pr.GroupMask[i].Mask));
50605070 }
5071+
50615072 return total > 1;
50625073 }
5074+
50635075 if (rec->Size == 0) break;
50645076 offset += rec->Size;
50655077 }
50665078 return false;
50675079 #elif (APPLE)
50685080 int logical = 0, physical = 0;
50695081 size_t sz = sizeof(logical);
5070- if (sysctlbyname("hw.logicalcpu", &logical, &sz, nullptr, 0) != 0) logical = 0;
5082+
5083+ if (sysctlbyname("hw.logicalcpu", &logical, &sz, nullptr, 0) != 0) {
5084+ logical = 0;
5085+ }
5086+
50715087 sz = sizeof(physical);
5072- if (sysctlbyname("hw.physicalcpu", &physical, &sz, nullptr, 0) != 0) physical = 0;
5073- if (logical > 0 && physical > 0) return logical > physical;
5088+
5089+ if (sysctlbyname("hw.physicalcpu", &physical, &sz, nullptr, 0) != 0) {
5090+ physical = 0;
5091+ }
5092+
5093+ if (logical > 0 && physical > 0) {
5094+ return logical > physical;
5095+ }
5096+
50745097 return false;
50755098 #else
50765099 // check cpu0 thread_siblings_list
5077- {
5078- std::ifstream f("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list");
5079- if (f) {
5080- std::string s;
5081- if (std::getline(f, s)) {
5082- // trim
5083- size_t a = 0; while (a < s.size() && std::isspace(static_cast<unsigned char>(s[a]))) ++a;
5084- size_t b = s.size(); while (b > a && std::isspace(static_cast<unsigned char>(s[b - 1]))) --b;
5085- if (b > a) {
5086- for (size_t k = a; k < b; ++k) {
5087- if (s[k] == ',' || s[k] == '-') return true;
5088- }
5089- return false;
5100+ std::ifstream f("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list");
5101+ if (f) {
5102+ std::string s;
5103+ if (std::getline(f, s)) {
5104+ // trim
5105+ size_t a = 0;
5106+
5107+ while (a < s.size() && std::isspace(static_cast<u8>(s[a]))) {
5108+ ++a;
5109+ }
5110+
5111+ size_t b = s.size();
5112+
5113+ while (b > a && std::isspace(static_cast<u8>(s[b - 1]))) {
5114+ --b;
5115+ }
5116+
5117+ if (b > a) {
5118+ for (size_t k = a; k < b; ++k) {
5119+ if (s[k] == ',' || s[k] == '-') return true;
50905120 }
5121+ return false;
50915122 }
50925123 }
50935124 }
5125+
50945126 // /proc/cpuinfo for unique (physical id, core id) pairs vs processors
50955127 std::ifstream cpuinfo("/proc/cpuinfo");
5096- if (!cpuinfo) return false;
5128+
5129+ if (!cpuinfo) {
5130+ return false;
5131+ }
5132+
50975133 std::string line;
50985134 int processors = 0;
50995135 bool in_section = false;
51005136 int cur_phys = -1, cur_core = -1;
51015137 std::vector<std::pair<int, int>> cores;
5138+
51025139 while (std::getline(cpuinfo, line)) {
51035140 if (line.empty()) {
5104- if (cur_phys != -1 && cur_core != -1) cores.emplace_back(cur_phys, cur_core);
5141+ if (cur_phys != -1 && cur_core != -1) {
5142+ cores.emplace_back(cur_phys, cur_core);
5143+ }
5144+
51055145 cur_phys = cur_core = -1;
51065146 in_section = false;
51075147 continue;
51085148 }
5149+
51095150 auto pos = line.find(':');
51105151 if (pos == std::string::npos) continue;
51115152 std::string key = line.substr(0, pos);
51125153 std::string val = line.substr(pos + 1);
5154+
51135155 // trim
5114- while (!key.empty() && std::isspace(static_cast<unsigned char>(key.back()))) key.pop_back();
5115- while (!val.empty() && std::isspace(static_cast<unsigned char>(val.front()))) val.erase(val.begin());
5116- if (key == "processor") ++processors;
5117- else if (key == "physical id") { try { cur_phys = std::stoi(val); } catch (...) { cur_phys = -1; } }
5118- else if (key == "core id") { try { cur_core = std::stoi(val); } catch (...) { cur_core = -1; } }
5156+ while (!key.empty() && std::isspace(static_cast<u8>(key.back()))) {
5157+ key.pop_back();
5158+ }
5159+
5160+ while (!val.empty() && std::isspace(static_cast<u8>(val.front()))) {
5161+ val.erase(val.begin());
5162+ }
5163+
5164+ if (key == "processor") {
5165+ processors++;
5166+ } else if (key == "physical id") {
5167+ try {
5168+ cur_phys = std::stoi(val);
5169+ } catch (...) {
5170+ cur_phys = -1;
5171+ }
5172+ } else if (key == "core id") {
5173+ try {
5174+ cur_core = std::stoi(val);
5175+ } catch (...) {
5176+ cur_core = -1;
5177+ }
5178+ }
51195179 }
5120- if (cur_phys != -1 && cur_core != -1) cores.emplace_back(cur_phys, cur_core);
5180+
5181+ if (cur_phys != -1 && cur_core != -1) {
5182+ cores.emplace_back(cur_phys, cur_core);
5183+ }
5184+
51215185 if (!cores.empty() && processors > 0) {
51225186 std::sort(cores.begin(), cores.end());
51235187 cores.erase(std::unique(cores.begin(), cores.end()), cores.end());
@@ -5134,14 +5198,15 @@ struct VM {
51345198 debug(info.debug_tag, ": CPU model = ", info.model_name);
51355199
51365200 const u32 actual = memo::threadcount::fetch();
5201+
51375202 if (actual != info.expected_threads) {
51385203 debug(info.debug_tag, ": Current threads -> ", actual);
51395204 const bool smt = is_smt_enabled();
5205+
51405206 if (smt) {
51415207 debug(info.debug_tag, ": Expected ", info.expected_threads, " threads");
51425208 return true;
5143- }
5144- else {
5209+ } else {
51455210 debug(info.debug_tag, ": Expected ", info.expected_threads, " threads, but found SMT disabled");
51465211 return false;
51475212 }
0 commit comments