Skip to content

Commit 6541e5a

Browse files
author
Requiem
committed
fix: VM::TRAP stack unwinding issues because rbx clobbering
1 parent fded49d commit 6541e5a

File tree

2 files changed

+89
-52
lines changed

2 files changed

+89
-52
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ endif()
2727

2828
# compiler flags
2929
if (MSVC)
30-
set(CMAKE_CXX_FLAGS "/Wall /W4 /EHsc")
30+
set(CMAKE_CXX_FLAGS "/W4 /EHsc")
3131
else()
3232
# Linux and Apple
3333
set(CMAKE_CXX_FLAGS "-Wextra -Wall -Wextra -Wconversion -Wdouble-promotion -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion")

src/vmaware.hpp

Lines changed: 88 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9198,17 +9198,18 @@ struct VM {
91989198

91999199
/**
92009200
* @brief Check if after raising two traps at the same RIP, a hypervisor interferes with the instruction pointer delivery
9201-
* @category Windows, x86
9201+
* @category Windows, x86_64
92029202
* @implements VM::TRAP
92039203
*/
92049204
[[nodiscard]] static bool trap() {
92059205
bool hypervisorCaught = false;
9206-
#if (x86)
9207-
// when a single-step (TF) and hardware breakpoint (DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
9206+
#if (x86_64)
9207+
// when a single - step(TF) and hardware breakpoint(DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
92089208
// AMD CPUs prioritize the breakpoint, setting only its corresponding bit in DR6 and clearing the single-step bit, which is why this technique is not compatible with AMD
92099209
if (!cpu::is_intel()) {
92109210
return false;
92119211
}
9212+
92129213
// mobile SKUs can "false flag" this check
92139214
const char* brand = cpu::get_brand();
92149215
for (const char* c = brand; *c; ++c) {
@@ -9221,15 +9222,19 @@ struct VM {
92219222
}
92229223
}
92239224

9224-
// push flags, set TF-bit, pop flags, execute a dummy instruction, then return
9225+
// We must preserve RBX because CPUID clobbers it, and RBX is a non-volatile
9226+
// register in x64. If we don't restore it, the calling function (VM::check) crashes
9227+
// we use MOV R8, RBX instead of PUSH RBX. Pushing to the stack without
9228+
// unwind metadata breaks SEH in x64 (OS cannot find the handler), causing a crash
92259229
constexpr u8 trampoline[] = {
9226-
0x9C, // pushfq
9227-
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100
9230+
0x49, 0x89, 0xD8, // mov r8, rbx (save rbx to volatile register r8)
9231+
0x9C, // pushfq
9232+
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100 (Set TF)
92289233
0x00, 0x01, 0x01, 0x00,
9229-
0x9D, // popfq
9230-
0x0F, 0xA2, // cpuid (or any other trappable instruction, but this one is ok since it has to be trapped in every x86 hv)
9231-
0x90, 0x90, 0x90, // NOPs to pad to breakpoint offset
9232-
0xC3 // ret
9234+
0x9D, // popfq
9235+
0x0F, 0xA2, // cpuid
9236+
0x4C, 0x89, 0xC3, // mov rbx, r8 (restore rbx from r8) - trap happens here
9237+
0xC3 // ret
92339238
};
92349239
SIZE_T trampSize = sizeof(trampoline);
92359240

@@ -9256,13 +9261,14 @@ struct VM {
92569261
using NtGetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);
92579262
using NtSetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);
92589263

9259-
const auto pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
9260-
const auto pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
9261-
const auto pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
9262-
const auto pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
9263-
const auto pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
9264-
const auto pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
9265-
const auto pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);
9264+
// volatile ensures these are loaded from stack after SEH unwind when compiled with aggresive optimizations
9265+
NtAllocateVirtualMemory_t volatile pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
9266+
NtProtectVirtualMemory_t volatile pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
9267+
NtFreeVirtualMemory_t volatile pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
9268+
NtFlushInstructionCache_t volatile pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
9269+
NtClose_t volatile pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
9270+
NtGetContextThread_t volatile pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
9271+
NtSetContextThread_t volatile pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);
92669272

92679273
if (!pNtAllocateVirtualMemory || !pNtProtectVirtualMemory || !pNtFlushInstructionCache ||
92689274
!pNtFreeVirtualMemory || !pNtGetContextThread || !pNtSetContextThread || !pNtClose) {
@@ -9284,7 +9290,8 @@ struct VM {
92849290
ULONG oldProt = 0;
92859291
st = pNtProtectVirtualMemory(hCurrentProcess, &tmpBase, &tmpSz, PAGE_EXECUTE_READ, &oldProt);
92869292
if (!NT_SUCCESS(st)) {
9287-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9293+
PVOID freeBase = execMem;
9294+
SIZE_T freeSize = trampSize;
92889295
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
92899296
return false;
92909297
}
@@ -9299,66 +9306,96 @@ struct VM {
92999306
const HANDLE hCurrentThread = reinterpret_cast<HANDLE>(-2LL);
93009307

93019308
if (!NT_SUCCESS(pNtGetContextThread(hCurrentThread, &origCtx))) {
9302-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9309+
PVOID freeBase = execMem;
9310+
SIZE_T freeSize = trampSize;
93039311
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
93049312
return false;
93059313
}
93069314

9307-
// set Dr0 to trampoline+offset (step triggers here)
9315+
// Set DR0 to trampoline + 14 (Instruction: mov rbx, r8)
9316+
// Offset calculation: mov_r8_rbx(3) + pushfq(1) + or(7) + popfq(1) + cpuid(2) = 14
9317+
// This is where single step traps after CPUID, and where we want the collision
9318+
const uintptr_t expectedTrapAddr = reinterpret_cast<uintptr_t>(execMem) + 14;
9319+
9320+
// set Dr0 to trampoline+offset
93089321
CONTEXT dbgCtx = origCtx;
9309-
const uintptr_t baseAddr = reinterpret_cast<uintptr_t>(execMem);
9310-
dbgCtx.Dr0 = baseAddr + 11; // single step breakpoint address
9311-
dbgCtx.Dr7 = 1; // enable local breakpoint 0
9322+
dbgCtx.Dr0 = expectedTrapAddr; // single step breakpoint address
9323+
dbgCtx.Dr7 = 1; // enable Local Breakpoint 0
93129324

93139325
if (!NT_SUCCESS(pNtSetContextThread(hCurrentThread, &dbgCtx))) {
93149326
pNtSetContextThread(hCurrentThread, &origCtx);
9315-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9327+
PVOID freeBase = execMem;
9328+
SIZE_T freeSize = trampSize;
93169329
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
93179330
return false;
93189331
}
93199332

9320-
auto vetExceptions = [&](u32 code, EXCEPTION_POINTERS* info) noexcept -> u8 {
9321-
// if not single-step, hypervisor likely swatted our trap
9322-
if (code != static_cast<DWORD>(0x80000004L)) {
9323-
hypervisorCaught = true;
9324-
return EXCEPTION_CONTINUE_SEARCH;
9325-
}
9333+
// Context structure to pass data to the static SEH handler
9334+
struct TrapContext {
9335+
uintptr_t expectedTrapAddr;
9336+
u8* hitCount;
9337+
bool* hypervisorCaught;
9338+
};
93269339

9327-
// count breakpoint hits
9328-
hitCount++;
9340+
// Static class for SEH filtering to avoid Release mode Lambda corruption
9341+
struct SEH_Trap {
9342+
static LONG Vet(u32 code, EXCEPTION_POINTERS* info, TrapContext* ctx) noexcept {
9343+
// Lambda returns LONG to support EXCEPTION_CONTINUE_EXECUTION
9344+
if (code != static_cast<DWORD>(0x80000004L)) {
9345+
return EXCEPTION_CONTINUE_SEARCH;
9346+
}
93299347

9330-
// validate exception address matches our breakpoint location
9331-
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != baseAddr + 11) {
9332-
hypervisorCaught = true;
9333-
return EXCEPTION_EXECUTE_HANDLER;
9334-
}
9348+
// Verify exception happened at our calculated offset
9349+
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != ctx->expectedTrapAddr) {
9350+
info->ContextRecord->EFlags &= ~0x100; // Clear TF
9351+
info->ContextRecord->Dr7 &= ~1; // Clear DR0 Enable
9352+
*ctx->hypervisorCaught = true;
9353+
return EXCEPTION_CONTINUE_EXECUTION;
9354+
}
93359355

9336-
// check if Trap Flag and DR0 contributed
9337-
constexpr u64 required_bits = (1ULL << 14) | 1ULL;
9338-
const u64 status = info->ContextRecord->Dr6;
9356+
(*ctx->hitCount)++;
93399357

9340-
if ((status & required_bits) != required_bits) {
9341-
if (util::hyper_x() != HYPERV_ARTIFACT_VM)
9342-
hypervisorCaught = true; // detects type 1 Hyper-V too, which we consider legitimate
9358+
// check if Trap Flag and DR0 contributed
9359+
constexpr u64 required_bits = (1ULL << 14) | 1ULL; // BS | B0
9360+
const u64 status = info->ContextRecord->Dr6;
9361+
9362+
if ((status & required_bits) != required_bits) {
9363+
if (util::hyper_x() != HYPERV_ARTIFACT_VM) // detects type 1 Hyper-V too, which we consider legitimate
9364+
*ctx->hypervisorCaught = true;
9365+
}
9366+
9367+
// Clear Trap Flag to stop single stepping
9368+
info->ContextRecord->EFlags &= ~0x100;
9369+
9370+
// Clear DR7 Local Enable 0 to disable the hardware breakpoint
9371+
// If we don't do this, the next instruction will trigger the breakpoint again immediately
9372+
info->ContextRecord->Dr7 &= ~1;
9373+
9374+
// executes mov rbx, r8 (restore), and returns
9375+
return EXCEPTION_CONTINUE_EXECUTION;
93439376
}
9344-
return EXCEPTION_EXECUTE_HANDLER;
93459377
};
93469378

9379+
TrapContext ctx = { expectedTrapAddr, &hitCount, &hypervisorCaught };
9380+
93479381
__try {
93489382
reinterpret_cast<void(*)()>(execMem)();
93499383
}
9350-
__except (vetExceptions(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()))) {
9351-
// if we didn't hit exactly once, assume hypervisor interference
9352-
if (hitCount != 1) {
9353-
hypervisorCaught = true;
9354-
}
9384+
__except (SEH_Trap::Vet(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()), &ctx)) {
9385+
// This block is effectively unreachable because vetExceptions returns CONTINUE_EXECUTION or CONTINUE_SEARCH
9386+
}
9387+
9388+
// If the hypervisor swallowed the exception entirely, hitCount will be 0
9389+
if (hitCount != 1) {
9390+
hypervisorCaught = true;
93559391
}
93569392

93579393
pNtSetContextThread(hCurrentThread, &origCtx);
93589394

9359-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9395+
PVOID freeBase = execMem;
9396+
SIZE_T freeSize = trampSize;
93609397
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
9361-
#endif
9398+
#endif
93629399
return hypervisorCaught;
93639400
}
93649401

0 commit comments

Comments
 (0)