You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// we used a rng before running the traditional rdtsc-cpuid-rdtsc trick
4944
4944
4945
4945
// sometimes not intercepted in some hvs (like VirtualBox) under compat mode
4946
-
autocpuid = [&]() noexcept -> u64 {
4946
+
autocpuid_ex = [&](int leaf, int subleaf) noexcept -> u64 {
4947
4947
#if (MSVC)
4948
4948
// make regs volatile so writes cannot be optimized out, if this isn't added and the code is compiled in release mode, cycles would be around 40 even under Hyper-V
4949
4949
volatileint regs[4]{};
@@ -4956,7 +4956,7 @@ struct VM {
4956
4956
// prevent the compiler from moving the __cpuid call before the t1 read
4957
4957
COMPILER_BARRIER();
4958
4958
4959
-
__cpuid((int*)regs, 0); // not using cpu::cpuid to get a chance of inlining
4959
+
__cpuidex((int*)regs, leaf, subleaf);
4960
4960
4961
4961
COMPILER_BARRIER();
4962
4962
@@ -4984,7 +4984,7 @@ struct VM {
4984
4984
// because the compiler must honor the write to a volatile variable.
4985
4985
asmvolatile("cpuid"
4986
4986
: "=a"(a), "=b"(b), "=c"(c), "=d"(d)
4987
-
: "a"(0)
4987
+
: "a"(leaf), "c"(subleaf)
4988
4988
: "memory");
4989
4989
4990
4990
COMPILER_BARRIER();
@@ -5001,40 +5001,6 @@ struct VM {
5001
5001
5002
5002
constexpru16 iterations = 1000;
5003
5003
5004
-
// pre-allocate sample buffer and touch pages to avoid page faults by MMU during measurement
5005
-
std::vector<u64> samples;
5006
-
samples.resize(iterations);
5007
-
for (unsigned i = 0; i < iterations; ++i) samples[i] = 0; // or RtlSecureZeroMemory (memset)
5008
-
5009
-
/*
5010
-
* We want to move our thread from the Running state to the Waiting state
5011
-
* When the sleep expires (at the next timer tick), the OS moves VMAware's thread to the Ready state
5012
-
* When it picks us up again, it grants VMAware a fresh quantum, typically varying between 2 ticks (30ms) and 6 ticks (90ms) on Windows Client editions
5013
-
* The default resolution of the Windows clock we're using is 64Hz
5014
-
* Because we're calling NtDelayExecution with only 1ms, the kernel interprets this as "Sleep for at least 1ms"
5015
-
* Since the hardware interrupt (tick) only fires every 15.6ms and we're not using timeBeginPeriod, the kernel cannot wake us after exactly 1ms
5016
-
* So instead, it does what we want and wakes us up at the very next timer interrupt
5017
-
* That's the reason why it's only 1ms and we're not using CreateWaitableTimerEx / SetWaitableTimerEx
5018
-
* Sleep(0) would return instantly in some circumstances
5019
-
* This gives us more time for sampling before we're rescheduled again
5020
-
*/
5021
-
5022
-
#if (WINDOWS)
5023
-
// voluntary context switch to get a fresh quantum
5024
-
SleepEx(1, FALSE);
5025
-
#else
5026
-
// should work similarly in Unix-like operating systems
0 commit comments