From da5f6e3ec04adddbe0fc8330013178c1aa35321e Mon Sep 17 00:00:00 2001 From: Quang Trinh Date: Sun, 21 Jun 2026 18:03:24 +0700 Subject: [PATCH 1/3] feat: release-readiness hardening for v3.9.0 --- .github/workflows/pr-check.yml | 23 +- AGENTS.md | 9 +- CMakeLists.txt | 4 +- README.md | 4 +- docs/misc/hot-path-memory.md | 27 +- include/DetourModKit/async_logger.hpp | 5 +- include/DetourModKit/bootstrap.hpp | 5 +- include/DetourModKit/event_dispatcher.hpp | 37 +++ include/DetourModKit/hook_manager.hpp | 53 +++- include/DetourModKit/memory.hpp | 113 +++++++- include/DetourModKit/rtti.hpp | 29 +- include/DetourModKit/rtti_dissect.hpp | 58 +++- include/DetourModKit/version.hpp.in | 6 +- src/bootstrap.cpp | 106 ++++--- src/config.cpp | 76 ++++- src/config_watcher.cpp | 49 +++- src/input_intercept.cpp | 12 + src/memory.cpp | 326 +++++++++++++++++----- src/memory_internal.hpp | 24 +- src/rtti.cpp | 17 +- src/rtti_dissect.cpp | 11 +- src/scanner.cpp | 80 ++++-- src/scanner_cascade.cpp | 36 ++- src/scanner_internal.hpp | 13 + src/string_xref.cpp | 74 ++++- tests/test_config.cpp | 66 +++++ tests/test_memory.cpp | 100 +++++++ tests/test_memory_chain.cpp | 117 ++++++++ tests/test_platform.cpp | 8 +- tests/test_rtti_dissect.cpp | 54 ++++ tests/test_rtti_reverse.cpp | 39 +++ tests/test_scanner.cpp | 22 +- tests/test_string_xref.cpp | 10 +- tests/test_version.cpp | 4 +- 34 files changed, 1383 insertions(+), 234 deletions(-) diff --git a/.github/workflows/pr-check.yml b/.github/workflows/pr-check.yml index 12759ff..1da0c67 100644 --- a/.github/workflows/pr-check.yml +++ b/.github/workflows/pr-check.yml @@ -15,7 +15,9 @@ on: workflow_dispatch: env: - MINGW_BIN: C:\mingw64\bin + # Pin the exact GCC the release workflow ships with (Chocolatey MinGW 13.2.0) so the coverage/PR gate and the + # released artifact compile with one toolchain instead of the runner's unversioned C:\mingw64. + MINGW_BIN: C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin jobs: build-test: @@ -28,6 +30,18 @@ jobs: with: submodules: "recursive" + - name: Cache MinGW + id: cache-mingw + uses: actions/cache@v4 + with: + path: C:\ProgramData\chocolatey\lib\mingw + key: ${{ runner.os }}-mingw-13.2.0-v2 + + - name: Install MinGW (if not cached) + if: steps.cache-mingw.outputs.cache-hit != 'true' + run: choco install mingw --version=13.2.0 --yes --force --no-progress + shell: powershell + - name: Add MinGW to PATH run: echo "${{ env.MINGW_BIN }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append shell: powershell @@ -53,10 +67,17 @@ jobs: - name: Verify Tools run: | + echo "--- gcc ---" && gcc --version echo "--- g++ ---" && g++ --version echo "--- ninja ---" && ninja --version echo "--- cmake ---" && cmake --version echo "--- gcovr ---" && gcovr --version + # Surface the resolved toolchain in the job summary so the gate's GCC is visible without opening the log. + echo "### Toolchain" >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + gcc --version | head -1 >> "$GITHUB_STEP_SUMMARY" + g++ --version | head -1 >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" shell: bash - name: Configure (Debug + Tests + Coverage) diff --git a/AGENTS.md b/AGENTS.md index dd81706..b77949d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,7 @@ DetourModKit is a C++23 static library for Windows game modding. It provides AOB scanning, function hooking (via SafetyHook), async logging, INI configuration, input polling, and memory utilities. The library is consumed by mod DLLs injected into game processes. -**Stack:** C++23, CMake 3.25+, Ninja, GoogleTest. Targets MinGW (GCC 12+) and MSVC 2022+. +**Stack:** C++23, CMake 3.28+, Ninja, GoogleTest. Targets MinGW (GCC 12+) and MSVC 2022+. **Key dependencies (git submodules):** @@ -262,7 +262,7 @@ Markdown files (`*.md`) are **not** hard-wrapped at 80 columns. Write one logica - **Callbacks are host-critical:** Hook callbacks and input callbacks run on the game's threads. Do not perform unbounded allocation, blocking I/O, hook creation/removal, or config reload directly inside them; defer that work to a worker or queue. Logging from a callback must use the no-throw `Logger::log_noexcept` / `Logger::try_log` so a formatting or sink failure cannot escape into the host. - **API-discipline labels:** Public docblocks classify a function's call-site safety with one of three labels, applied as a `@note` and kept alongside (never replacing) any existing more-specific caveat. *Callback-safe* -- non-blocking, no unbounded allocation, no blocking I/O, no lock escalation; safe to call from a hook or input callback on a game thread (the hot-path reads and status queries). *Setup/control-plane only* -- may block, allocate, take exclusive locks, or do I/O; call from init/shutdown or a worker thread, never from a hook or input callback (create/remove/enable/disable, start/stop/shutdown, config load/reload, cache init). *Best-effort* -- on failure it fails closed (no-op / false / dropped) and never throws or terminates the host (logging, diagnostics counters, `emit_safe`, noexcept fail-closed paths). - **Error returns:** `std::expected` for memory operations, `std::optional` for scanner results. Reserve exceptions for construction failures and truly exceptional conditions. -- **Security hardening:** The build enables ASLR (`/DYNAMICBASE`), DEP (`/NXCOMPAT`), and Control Flow Guard (`/GUARD:CF`) on MSVC, and equivalent flags (`--dynamicbase`, `--nxcompat`) on MinGW. Because DetourModKit is a static archive (the consumer performs the final link of the mod DLL/EXE), these switches are also propagated to `find_package` / `add_subdirectory` consumers via `target_link_options(DetourModKit INTERFACE ...)`, selected from the linker frontend detected at configure time so the right spelling reaches MSVC/clang-cl and MinGW/Clang while preserving the CMake 3.25 minimum. Do not remove these. +- **Security hardening:** The build enables ASLR (`/DYNAMICBASE`), DEP (`/NXCOMPAT`), and Control Flow Guard (`/GUARD:CF`) on MSVC, and equivalent flags (`--dynamicbase`, `--nxcompat`) on MinGW. Because DetourModKit is a static archive (the consumer performs the final link of the mod DLL/EXE), these switches are also propagated to `find_package` / `add_subdirectory` consumers via `target_link_options(DetourModKit INTERFACE ...)`, selected from the linker frontend detected at configure time so the right spelling reaches MSVC/clang-cl and MinGW/Clang while preserving the CMake 3.28 minimum. Do not remove these. ### Lambda conventions @@ -308,7 +308,7 @@ dispatcher.emit_safe(PlayerStateChanged{.health = player->health}); ### Memory access in hook callbacks -Do not add `Memory::is_readable()` or `Memory::is_writable()` before every field read in hook callbacks. Use those predicates for setup validation and diagnostics. Use `seh_read_chain` for unstable live game pointers, and use `read_ptr_unchecked` only when the caller can prove the pointer chain is live for the current frame. The full pattern -- worked examples, the primitive selection table, and the anti-patterns to remove -- lives in [docs/misc/hot-path-memory.md](docs/misc/hot-path-memory.md). +Do not add `Memory::is_readable()` or `Memory::is_writable()` before every field read in hook callbacks. Use those predicates for setup validation and diagnostics. Use `seh_read_chain` for unstable live game pointers, and use `read_ptr_unchecked` only when the caller can prove the pointer chain is live for the current frame. For per-frame WRITES through a resolved address or chain, use the guarded `seh_write_chain` / `seh_write_chain_bytes` / `seh_write_bytes` family (the write counterpart of `seh_read_*`, with no protection change or i-cache flush); reserve `write_bytes` for one-shot code patches, since it flips page protection, flushes the instruction cache, and invalidates the cache range. The full pattern -- worked examples, the primitive selection table, and the anti-patterns to remove -- lives in [docs/misc/hot-path-memory.md](docs/misc/hot-path-memory.md). ### Scanning process memory @@ -395,8 +395,9 @@ These are called at 60+ fps from game hook callbacks. Never add allocations, exc - `Memory::is_readable_nonblocking()` -- try_lock_shared + cache lookup (returns Unknown on lock contention, a cache miss, or the init-publication window; falls back to a blocking VirtualQuery before `init_cache()`) - `Memory::read_ptr_unsafe()` -- SEH-protected raw dereference (MSVC), vectored-handler-guarded read on MinGW (no per-call VirtualQuery; the fault guard also closes the stale-cache dereference) - `Memory::read_ptr_unchecked()` -- inline pointer dereference with source and result user-mode range guards (a low-address floor plus a `USERSPACE_PTR_MAX` ceiling that rejects kernel-range and non-canonical values, which also subsumes pointer-arithmetic wraparound), no SEH (caller must guarantee structural pointer validity); debug builds add an `is_readable` assert that catches a stale or unmapped source pointer, compiled out in release so the hot path stays a single guarded memcpy -- `Memory::seh_read()` / `seh_read_bytes()` -- typed and raw SEH-guarded reads; single `__try` frame on MSVC, and on MinGW a single `rep movsb` copy under a process-wide vectored exception handler (installed lazily / by `init_cache`, removed by `shutdown_cache`) that recovers via a non-unwinding `__builtin_setjmp`/`__builtin_longjmp`, so the success path runs no syscall. Both toolchains swallow the same foreign-read fault set -- `EXCEPTION_ACCESS_VIOLATION`, `STATUS_GUARD_PAGE_VIOLATION`, and `EXCEPTION_IN_PAGE_ERROR` (a file-backed or image-mapped page failing to page in, e.g. during an RTTI / section walk) -- via the shared predicate `Memory::detail::is_guarded_read_fault`, and let any other fault continue the handler search; the MinGW handler additionally claims only faults whose address lies in the foreign range being read. A guarded read uses a single per-thread guard, so it must not nest on one thread (DMK reads are synchronous, so it does not); if `AddVectoredExceptionHandler` ever fails the reads fall back to VirtualQuery validation. Used by `Rtti` for chained RTTI walks +- `Memory::seh_read()` / `seh_read_bytes()` -- typed and raw SEH-guarded reads; single `__try` frame on MSVC, and on MinGW a single `rep movsb` copy under a process-wide vectored exception handler (installed lazily / by `init_cache`, removed by `shutdown_cache`) that recovers via a non-unwinding `__builtin_setjmp`/`__builtin_longjmp`, so the success path runs no syscall. Both toolchains swallow the same foreign-read fault set -- `EXCEPTION_ACCESS_VIOLATION`, `STATUS_GUARD_PAGE_VIOLATION`, and `EXCEPTION_IN_PAGE_ERROR` (a file-backed or image-mapped page failing to page in, e.g. during an RTTI / section walk) -- via the shared predicate `Memory::detail::is_guarded_read_fault`, and let any other fault continue the handler search; the MinGW handler additionally claims only faults whose address lies in the foreign range being read. A guarded read uses a single per-thread guard, so it must not nest on one thread (DMK reads are synchronous, so it does not); if `AddVectoredExceptionHandler` ever fails the byte-copy reads fall back to VirtualQuery plus ReadProcessMemory, while bulk region scans fail closed. Used by `Rtti` for chained RTTI walks - `Memory::seh_resolve_chain()` / `seh_read_chain()` -- resolves or reads a whole multi-level pointer chain under one fault guard: one out-of-line call instead of N separate `seh_read` calls, with each intermediate link kept in a register and pre-screened by `plausible_userspace_ptr` (a faulting or implausible link aborts the walk and returns nullopt/false). On MinGW each link read is guarded by the vectored handler +- `Memory::seh_write()` / `seh_write_bytes()` / `seh_write_chain()` / `seh_write_chain_bytes()` -- guarded per-frame WRITES to already-writable game memory (a camera transform, a player field), the write counterpart of the `seh_read_*` reads. MSVC uses one `__try` frame, and MinGW x64 uses the vectored-handler copy path with a fallback through VirtualQuery plus WriteProcessMemory, with no page-protection change, no instruction-cache flush, and no cache invalidation, so a stale chain fails closed (false) instead of faulting the host. `Memory::write_bytes()` is the setup/patch-only counterpart (it flips page protection, flushes the instruction cache, and invalidates the cache range); never put it on a per-frame path - `Memory::plausible_userspace_ptr(p)` -- `inline constexpr` user-mode pointer plausibility test; pure arithmetic with no syscall and no memory access (early-rejects stale/sentinel/torn pointers before an SEH-guarded read) - `Memory::contains(range, p)` -- constexpr point-in-range test for module bounds checks - `Memory::own_module_range()` / `host_module_range()` -- magic-static cached, single atomic load on the fast path diff --git a/CMakeLists.txt b/CMakeLists.txt index e28f221..94034ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.28) -project(DetourModKit VERSION 3.8.2 LANGUAGES CXX) +project(DetourModKit VERSION 3.9.0 LANGUAGES CXX) # GNUInstallDirs defines CMAKE_INSTALL_LIBDIR / BINDIR / INCLUDEDIR / DOCDIR. It must be included before any # install() rule reads those variables; otherwise they expand to empty and components land at the install-prefix @@ -350,7 +350,7 @@ if(WIN32) # stated explicitly, and /GUARD:CF activates the Control Flow Guard load config # so DetourModKit's /guard:cf-instrumented code is enforced in the consumer # binary. CMake's compiler-frontend generator expression is 3.30+, so keep the - # project compatible with its 3.25 minimum by detecting the active frontend at + # project compatible with its 3.28 minimum by detecting the active frontend at # configure time. Release packages are compiler-specific, and add_subdirectory # consumers share the same toolchain that configured this target. set(_dmk_uses_msvc_linker_frontend OFF) diff --git a/README.md b/README.md index 10afdd2..70935dc 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ DetourModKit is a full-featured C++23 toolkit designed to simplify common tasks | Configuration | INI-based settings with key combo support and hot-reload (file watcher + hotkey) | `config.hpp`, `config_watcher.hpp` | | Logger | Synchronous singleton logger with format strings | `logger.hpp` | | Async Logger | Lock-free bounded queue logger with batched writes | `async_logger.hpp` | -| Memory Utilities | Readability checks, region cache, safe pointer reads, typed SEH reads, PE module range queries | `memory.hpp` | +| Memory Utilities | Readability checks, region cache, safe pointer reads, typed SEH reads/writes, fault-guarded pointer-chain reads/writes, PE module range queries | `memory.hpp` | | MSVC RTTI Walker | Recover mangled type names from runtime vtables; pointer-table scan with caller-owned cache; reverse name-to-vtable resolver and cached identity handle | `rtti.hpp` | | RTTI Self-Heal | Reverse-identify the object behind a pointer slot (typed-error and ordered candidate-fallback forms); self-heal a field offset after a patch shifts the struct layout; rigid multi-field drift solver; drift-telemetry report with a durable, diffable manifest (open-failure distinguished from corrupt) | `rtti_dissect.hpp`, `drift_manifest.hpp` | | Anchor Registry | One declarative table over the self-healing backends (vtable-by-name, AOB/RIP cascade, in-code constant, string xref, pinned literal) plus two-signal quorum corroboration with sub-anchor independence checks, optional post-resolve validators and opt-in validator policies, a manifest quality diagnostic, an address-independent evidence fingerprint for manifest diffing, opt-in parallel table resolution, and a per-game scan profile (broad-mode default, candidate order, backend deny-list), resolved and reported in a single pass | `anchors.hpp`, `profile.hpp` | @@ -153,6 +153,8 @@ See the [Config Hot-Reload Guide](docs/config-hot-reload/README.md) for the thre - `read_ptr_unsafe()` - safe pointer reads in hot paths (SEH-protected on MSVC, guarded by a process-wide vectored exception handler on MinGW, so the success path issues no per-call VirtualQuery) - `read_ptr_unchecked()` - inline header-only variant with a configurable lower-bound guard plus a `USERSPACE_PTR_MAX` ceiling for pointer chain traversal without per-call SEH overhead (caller must guarantee structural pointer validity) - `seh_read()` / `seh_read_bytes()` - typed SEH-guarded reads for arbitrary trivially copyable T (and contiguous byte ranges), used to walk torn pointer chains and parse PE headers without per-site `__try` boilerplate. Returns `std::optional` / `bool` so callers can distinguish "read faulted" from "read returned zero" +- `seh_resolve_chain()` / `seh_read_chain()` / `seh_read_chain_bytes()` - resolve a multi-level pointer chain (Cheat-Engine semantics) and optionally read its terminal slot, all under one fault guard, so a torn or implausible link aborts the walk and returns `std::nullopt` / `false` instead of faulting the host +- `seh_write()` / `seh_write_bytes()` / `seh_write_chain()` / `seh_write_chain_bytes()` - the WRITE counterpart of the `seh_read_*` family: a typed value, raw bytes, or a value at the end of a resolved pointer chain, all written to already-writable game memory under one fault guard with no page-protection change, instruction-cache flush, or cache invalidation, so a stale chain fails closed. Use these for per-frame data writes (a camera transform, a player field); reserve `write_bytes()` for one-shot code patches, which flips page protection and flushes the instruction cache - `module_range_for(addr)` / `own_module_range()` / `host_module_range()` - PE image range queries (base + SizeOfImage) for sanity-checking that a resolved vtable or return address lives inside the game image vs the heap or an injected DLL. Per-HMODULE cache for `module_range_for`; magic-static cache for the own and host variants - `Memory::contains(range, p)` - constexpr point-in-range test diff --git a/docs/misc/hot-path-memory.md b/docs/misc/hot-path-memory.md index 596a2dc..298a922 100644 --- a/docs/misc/hot-path-memory.md +++ b/docs/misc/hot-path-memory.md @@ -100,6 +100,28 @@ if (value) } ``` +## Writing in hot paths + +Writes follow the same rule as reads. A pointer the hook was handed is live by definition, so write through it directly (the anti-patterns below show why gating that write is pointless). But a value written through a *resolved* address -- a scanned base plus a pointer chain that can go stale between frames -- needs the same fault guard a read does, because the terminal slot can be unmapped the instant the chain is wrong. + +Use the guarded write primitives for that. They write to memory the target already keeps writable (a camera transform, a player field) under one fault guard, changing no page protection: + +```cpp +namespace Mem = DetourModKit::Memory; + +// Write a camera transform every frame through a resolved chain. Fault-guarded: +// a stale chain fails closed (returns false) instead of faulting the host. +const Matrix4x4 next = compute_camera(...); +if (!Mem::seh_write_chain(camera_base, CAMERA_TRANSFORM_CHAIN, next)) +{ + // Chain went stale this frame -- skip the write, do not crash. +} +``` + +`seh_write_bytes` / `seh_write` are the single-address forms; `seh_write_chain_bytes` / `seh_write_chain` resolve a chain and write its terminal slot through the guarded write path. None of them change page protection, flush the instruction cache, or invalidate the readability cache -- they are the write counterpart of `seh_read_*`. + +`write_bytes` is a different tool: it flips page protection to writable, writes, restores protection, flushes the instruction cache, and invalidates the cache range. That is exactly what a one-shot CODE patch on a read-only / executable page needs, and exactly the overhead you do not want per frame. Use `write_bytes` for setup-time patches; use `seh_write_*` for per-frame data writes. + ## Primitive selection | You have | You want | Use | @@ -110,13 +132,16 @@ if (value) | A multi-level pointer chain | The final address only | `seh_resolve_chain(base, {offsets...})` | | A multi-level pointer chain | A typed value at the end | `seh_read_chain(base, {offsets...})` | | A pointer chain you can prove is structurally valid this frame | The fastest possible read, no syscall, no SEH | `read_ptr_unchecked(base, offset)` | +| A resolved address that may be stale | One typed / range write that fails closed instead of faulting | `seh_write(addr, value)` / `seh_write_bytes(addr, src, n)` | +| A multi-level pointer chain | A guarded write at its terminal slot | `seh_write_chain(base, {offsets...}, value)` / `seh_write_chain_bytes(...)` | +| To patch CODE on a read-only / executable page once | A protection-flipping, i-cache-flushing write | `write_bytes(target, src, n)` -- setup/patch-only, never per frame | | To screen a candidate pointer before any read | A pure arithmetic plausibility test | `plausible_userspace_ptr(p)` | | To confirm a pointer lives in a known module | A branch-only range test | `contains(own_module_range(), p)` (capture the range once) | | To validate an address once at setup | A readability or writability check | `is_readable` / `is_writable` | ## Toolchain note -The `seh_*` primitives use real `__try` / `__except` on MSVC, where the success path is table-driven and costs nothing extra. On MinGW (which has no frame-based SEH) a 64-bit build installs a process-wide vectored exception handler once and runs the read as a guarded copy with no `VirtualQuery` on the success path, recovering a fault as `std::nullopt` / `false` instead of crashing. This is best-effort: `veh_read_bytes` takes the VEH guarded-copy path only when handler installation succeeded (`s_veh_handle` is non-null); if `ensure_veh_installed()` failed, or on a 32-bit MinGW build (`!_WIN64`), it falls back to `virtualquery_validated_copy`, a `VirtualQuery`-validated copy that pays a syscall per region. So MinGW `seh_*` reads are fault-safe either way, using the VEH when available and the VirtualQuery fallback otherwise. `read_ptr_unchecked` is still the fastest choice when you can prove the pointer is live for the current frame; otherwise prefer `seh_read` / `seh_read_chain` for stale or unmapped pointers. Shipping mod builds target MSVC, so the zero-cost path is the normal case. +The `seh_*` primitives use real `__try` / `__except` on MSVC, where the success path is table-driven and costs nothing extra. On MinGW (which has no frame-based SEH) a 64-bit build installs a process-wide vectored exception handler once and runs reads or writes through a guarded access path with no `VirtualQuery` on the success path, recovering a fault as `std::nullopt` / `false` instead of crashing. This is best-effort: the VEH path is used only when handler installation succeeded (`s_veh_handle` is non-null); if `ensure_veh_installed()` failed, or on a 32-bit MinGW build (`!_WIN64`), byte reads fall back to `VirtualQuery` plus `ReadProcessMemory` and byte writes fall back to `VirtualQuery` plus `WriteProcessMemory` after confirming the destination is currently writable. Bulk in-place region scans do not have a kernel-mediated byte-copy fallback, so they fail closed if the MinGW x64 handler is unavailable. `read_ptr_unchecked` is still the fastest choice when you can prove the pointer is live for the current frame; otherwise prefer `seh_read` / `seh_read_chain` for stale or unmapped pointers. Shipping mod builds target MSVC, so the zero-cost path is the normal case. ## Anti-patterns to remove diff --git a/include/DetourModKit/async_logger.hpp b/include/DetourModKit/async_logger.hpp index 2d071f0..7945e0c 100644 --- a/include/DetourModKit/async_logger.hpp +++ b/include/DetourModKit/async_logger.hpp @@ -139,7 +139,10 @@ namespace DetourModKit static constexpr size_t MAX_INLINE_SIZE = LOG_INLINE_MESSAGE_SIZE; static constexpr size_t MAX_VALID_LENGTH = MAX_MESSAGE_SIZE; - std::array buffer{}; + // Left uninitialized (raw storage): only [0, length) is ever read, and every constructor/move writes exactly + // length bytes before any read. Zero-filling the whole inline buffer would memset MAX_INLINE_SIZE bytes on each + // construction/enqueue for no observable effect. + std::array buffer; size_t length{0}; // Owned: allocated by StringPool, freed by reset(). diff --git a/include/DetourModKit/bootstrap.hpp b/include/DetourModKit/bootstrap.hpp index e412cc3..0f63111 100644 --- a/include/DetourModKit/bootstrap.hpp +++ b/include/DetourModKit/bootstrap.hpp @@ -82,9 +82,12 @@ namespace DetourModKit * @param shutdown_fn Called exactly once before DMK_Shutdown(). * @return bool32_t TRUE if the worker was started, FALSE if the process gate, instance mutex, or event creation * failed. + * @note noexcept by contract: this runs under the loader lock, so any internal throw (e.g. std::bad_alloc while + * Logger::configure builds its prefix / path strings) is caught, the partial attach is rolled back, and + * FALSE is returned rather than letting an exception cross the loader lock (undefined behavior). */ [[nodiscard]] bool32_t on_dll_attach(module_handle_t hMod, const ModInfo &info, std::function init_fn, - std::function shutdown_fn); + std::function shutdown_fn) noexcept; /** * @brief Handles DLL_PROCESS_DETACH. diff --git a/include/DetourModKit/event_dispatcher.hpp b/include/DetourModKit/event_dispatcher.hpp index 601366c..e802ce3 100644 --- a/include/DetourModKit/event_dispatcher.hpp +++ b/include/DetourModKit/event_dispatcher.hpp @@ -50,6 +50,8 @@ * @endcode */ +#include "DetourModKit/logger.hpp" + #include #include #include @@ -220,6 +222,11 @@ namespace DetourModKit { if (emitting_depth() > 0) { + // The reentrancy guard is per-template-instantiation, so a handler mutating a second dispatcher of the + // same Event type is rejected here invisibly to the caller. Surface it best-effort (never throw, never + // block) so the silent rejection is observable during development. assert fires the same condition in + // debug builds. + report_reentrant_rejection("subscribe"); return {}; } @@ -364,6 +371,10 @@ namespace DetourModKit { if (emitting_depth() > 0) { + // Same per-instantiation guard as subscribe(): a handler that triggers an unsubscribe (directly or via + // a Subscription reset/destructor) on a same-type dispatcher is rejected here. Surface it best-effort + // so the rejection is not silent; the RAII path retries the unsubscribe after the emit stack unwinds. + report_reentrant_rejection("unsubscribe"); return false; } @@ -405,6 +416,32 @@ namespace DetourModKit return true; } + /** + * @brief Best-effort report that the reentrancy guard rejected a mutation from within a handler. + * @details Emits a Debug log via Logger::try_log so the otherwise-silent per-instantiation rejection surfaces + * during development. The whole path is wrapped because Logger::get_instance() may construct the + * singleton if logging was not initialized yet; any failure is swallowed so this best-effort + * diagnostic never turns a rejected mutation into host termination. Deliberately does NOT assert: an + * unsubscribe rejected mid-emit is a legitimate RAII path -- a Subscription reset or destroyed inside + * a handler calls unsubscribe(), which is refused here and retried after the emit stack unwinds -- so + * aborting on it would be wrong. Zero-cost on the success path because it is only reached after the + * guard has already rejected the call. + */ + static void report_reentrant_rejection(const char *op) noexcept + { + try + { + (void)Logger::get_instance().try_log( + LogLevel::Debug, + "EventDispatcher: {} rejected -- called from within a handler on a same-type dispatcher " + "(per-instantiation reentrancy guard). Defer the mutation until the emit returns.", + op); + } + catch (...) + { + } + } + // Thread-local emit depth counter. This is per-template-instantiation (not per-instance) because making it // per-instance would require a thread_local map keyed by this pointer, adding a hash lookup to every emit() hot // path. The typical usage is one dispatcher per event type, so the shared counter is the correct tradeoff. See diff --git a/include/DetourModKit/hook_manager.hpp b/include/DetourModKit/hook_manager.hpp index 59cb9fd..0e32a3d 100644 --- a/include/DetourModKit/hook_manager.hpp +++ b/include/DetourModKit/hook_manager.hpp @@ -559,6 +559,13 @@ namespace DetourModKit * without attempting to log, preventing use-after-free. The shutdown flag is reset after hooks are * cleared, allowing subsequent hook creation for hot-reload scenarios. The destructor becomes a no-op * only while the flag is set during the shutdown operation itself. + * @note Two-phase teardown / quiesce contract: acquires the mutator gate exclusively to block new mutators, + * disables all hooks first (shared registry lock), then clears the maps under the exclusive lock, both + * phases newest-first so layered hooks unwind onto still-valid bytes. SafetyHook relocates a thread + * caught in the patched prologue but cannot drain a thread already inside the detour or trampoline body, + * so the caller must quiesce the hooked functions before shutdown to close that residual window. Do not + * call this (or any mutator) from within a with_* / try_with_* callback; defer the teardown until the + * callback returns (the reentrancy guard fails such calls closed). */ void shutdown() noexcept; @@ -783,6 +790,15 @@ namespace DetourModKit * does not reorder for the caller: removing an inner layer while a clone created later on top of it * is still installed frees the clone that the newer hook recorded as its "original", so remove * layered hooks newest-first. + * @warning Restoring a VMT hook writes the saved original vptr back into the object. If the game reconstructed + * the object in place or layered its own vptr on top after the clone was installed, this write + * restores a vtable the game has since overwritten, silently clobbering the game's pointer (or + * restoring a stale one). VMT removal is a bare vptr write with no thread protection, so the caller + * must guarantee no thread is dispatching through the cloned slot across removal and that the object's + * vptr was not re-layered since create. + * @note Two-phase teardown / quiesce contract: the registry entry is mutated under the exclusive lock; do not + * call this from within a with_* / try_with_* callback (defer until the callback returns -- the + * reentrancy guard fails such calls closed). * @param vmt_name The name of the VMT hook to remove. * @return Success if removed, or HookError::VmtHookNotFound. */ @@ -790,6 +806,13 @@ namespace DetourModKit /** * @brief Removes a single method hook from a VMT, restoring the original method. + * @warning Restoring a method hook rewrites the cloned vtable slot back to the original function pointer. If + * the game overwrote that slot or relaid the object's vptr after the hook was installed, the restore + * writes over a pointer the game has since changed. Removal carries no thread protection, so the + * caller must guarantee no thread is dispatching through the slot across removal. + * @note Two-phase teardown / quiesce contract: mutates the registry entry under the exclusive lock; do not call + * this from within a with_* / try_with_* callback (defer until the callback returns -- the reentrancy + * guard fails such calls closed). * @param vmt_name The name of the VMT hook. * @param method_index The vtable index of the method to unhook. * @return Success if removed, or a HookError describing the failure. @@ -823,6 +846,15 @@ namespace DetourModKit /** * @brief Removes the hooked vtable from a specific object, restoring its original vptr. + * @warning This writes the saved original vptr back into @p object. If the game reconstructed @p object in + * place + * or layered its own vptr on top after the clone was installed, the restore overwrites the vptr the + * game has since set, silently clobbering it (or installing a stale vtable). The write has no thread + * protection, so the caller must guarantee no thread is dispatching through the cloned slot across the + * restore and that the object's vptr was not re-layered since the clone went on. + * @note Two-phase teardown / quiesce contract: mutates the registry entry under the exclusive lock; do not call + * this from within a with_* / try_with_* callback (defer until the callback returns -- the reentrancy + * guard fails such calls closed). * @param vmt_name The name of the VMT hook. * @param object The object to restore. * @return true if the VMT hook was found and the object was restored, false otherwise. @@ -831,6 +863,16 @@ namespace DetourModKit /** * @brief Removes all VMT hooks, restoring original vtables on all applied objects. + * @details Destroys VMT hooks newest-first so clones layered on the same object unwind safely. + * @warning Each restore writes a saved original vptr back into its applied objects. If the game reconstructed + * an + * object in place or layered its own vptr on top after the clone was installed, the restore overwrites + * the vptr the game has since set, silently clobbering it (or installing a stale vtable). VMT removal + * is a bare vptr write with no thread protection, so the caller must quiesce all hooked objects -- no + * thread dispatching through any cloned slot -- across this teardown. + * @note Two-phase teardown / quiesce contract: mutates the registry under the exclusive lock; do not call this + * (or any mutator) from within a with_* / try_with_* callback (defer until the callback returns -- the + * reentrancy guard fails such calls closed). */ void remove_all_vmt_hooks(); @@ -936,6 +978,13 @@ namespace DetourModKit * not reorder for the caller: removing an older hook while a newer hook layered on the same address is * still installed restores the prologue to the original bytes underneath the newer hook, leaving its * entry jump pointing into a trampoline that is about to be freed. Remove layered hooks newest-first. + * @note Two-phase teardown / quiesce contract: disables the hook under the shared registry lock, then erases + * its + * entry under the exclusive lock. SafetyHook relocates a thread caught in the patched prologue but cannot + * drain a thread already inside the detour or trampoline body, so the caller must quiesce the hooked + * function before removal to close that residual window. Do not call this from within a with_* / + * try_with_* callback; defer the removal until the callback returns (the reentrancy guard fails such + * calls closed). * @param hook_id The name of the hook to remove. * @return Success if removed, or HookError::HookNotFound. */ @@ -949,7 +998,9 @@ namespace DetourModKit * a freed trampoline. The shared phase lets DetourModKit's own with_* readers finish before Hook * storage is destroyed. SafetyHook can relocate threads caught in the patched prologue, but it cannot * drain threads already running the detour or trampoline body; callers must quiesce the hooked - * function during planned teardown to close that residual window. + * function during planned teardown to close that residual window. Do not call this (or any mutator) + * from within a with_* / try_with_* callback; defer the teardown until the callback returns (the + * reentrancy guard fails such calls closed). * * After clearing, resets the internal shutdown flag to false, allowing subsequent create_*_hook() * calls to succeed for hot-reload workflows. diff --git a/include/DetourModKit/memory.hpp b/include/DetourModKit/memory.hpp index 3da54a5..b9c0530 100644 --- a/include/DetourModKit/memory.hpp +++ b/include/DetourModKit/memory.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -215,8 +216,8 @@ namespace DetourModKit * PAGE_NOACCESS/guard, or a page reprotected out from under a stale cache entry -- is swallowed and * returned as 0. Suitable for hot paths that already manage their own error recovery. * @note The MinGW path consults no cache; the fault guard makes a cache probe unnecessary. If the vectored - * handler cannot be installed it falls back to VirtualQuery-validated reads. Either way the function - * exposes no caller-observable cache state. + * handler cannot be installed it falls back to VirtualQuery plus ReadProcessMemory. Either way the + * function exposes no caller-observable cache state. * @param base The base address to read from. * @param offset Byte offset added to base before dereferencing. * @return The pointer-sized value at the address, or 0 if the read faults. @@ -329,7 +330,7 @@ namespace DetourModKit [[nodiscard]] bool is_writable(void *address, size_t size); /** - * @brief Writes a sequence of bytes to a target memory address. + * @brief Writes a sequence of bytes to a target memory address, changing page protection around the write. * @details Handles changing memory protection, performs the write operation, and restores original protection. * Also flushes instruction cache. Automatically invalidates the affected cache range. If num_bytes * exceeds MAX_WRITE_SIZE the function performs no write and returns MemoryError::SizeTooLarge. @@ -337,6 +338,11 @@ namespace DetourModKit * @param source_bytes Pointer to the source buffer containing data to write. * @param num_bytes Number of bytes to write. Must not exceed MAX_WRITE_SIZE. * @return std::expected on success, or the specific error on failure. + * @note Setup / control-plane only. The two VirtualProtect calls, FlushInstructionCache, and the all-shard + * cache-range invalidation make this the right tool for one-shot CODE patches on read-only / executable + * pages, not a per-frame primitive. To write DATA to memory the target already keeps writable (a camera + * transform, a player field) every frame, use @ref seh_write_bytes or @ref seh_write_chain_bytes, which + * change no protection and run no flush or invalidation. */ [[nodiscard]] std::expected write_bytes(std::byte *target_address, const std::byte *source_bytes, size_t num_bytes); @@ -414,7 +420,7 @@ namespace DetourModKit * and the function returns false. On MinGW the copy runs under a process-wide vectored exception * handler that claims the same fault set (no per-call VirtualQuery on the success path); a fault * anywhere in the span is swallowed and the function returns false. If the handler cannot be installed - * it falls back to VirtualQuery-based validation of every region the read spans. + * it falls back to VirtualQuery plus ReadProcessMemory for every region the read spans. * * The function is the underlying primitive for the typed @ref seh_read template and is exposed * directly for callers that need to read a contiguous buffer of bytes (for example NUL-terminated @@ -538,6 +544,105 @@ namespace DetourModKit { return seh_read_chain(base, std::span(offsets.begin(), offsets.size())); } + + /** + * @brief SEH-guarded raw memory copy from @p source into @p addr. + * @details The write sibling of @ref seh_read_bytes for memory the target already keeps writable (live game + * data such as a camera transform or player field). On MSVC the copy runs inside a __try / __except + * frame whose filter accepts the foreign-access fault set (EXCEPTION_ACCESS_VIOLATION, + * STATUS_GUARD_PAGE_VIOLATION, EXCEPTION_IN_PAGE_ERROR), so a fault mid-copy unwinds cleanly and the + * function returns false. On MinGW x64 the copy normally runs under the same process-wide vectored + * guard the read primitives use, armed over the destination span; if the handler cannot be installed, + * and on 32-bit MinGW, it falls back to VirtualQuery plus WriteProcessMemory and fails closed unless + * the destination is currently writable. + * @param addr Destination address. Values below 0x10000 are rejected without a write. + * @param source Source buffer. nullptr returns false. + * @param bytes Number of bytes to copy. Zero returns true (no-op). + * @return true on full success; false on any fault or invalid argument. On a fault mid-copy the target may have + * been partially written. + * @warning This does NOT change page protection, flush the instruction cache, or invalidate the readability + * cache. It is for writing DATA to memory the target already keeps writable, which is the per-frame + * hot-path case. To patch CODE on read-only / executable pages, use @ref write_bytes instead. + * @note Callback-safe on the established hot path: MSVC and the installed MinGW x64 VEH path allocate nothing, + * take no lock, and issue no syscall for the copy itself. On MinGW, call @ref init_cache during setup if + * you want the VEH installed before a hook callback can be the first guarded access. + */ + [[nodiscard]] bool seh_write_bytes(uintptr_t addr, const void *source, size_t bytes) noexcept; + + /** + * @brief SEH-guarded typed write of a trivially copyable T to @p addr. + * @details Forwards to @ref seh_write_bytes, so the underlying guard lives in the translation unit that defines + * it. The value's object representation is copied byte-for-byte; no T object is constructed at @p + * addr. + * @tparam T A trivially copyable type. + * @param addr Destination address (see @ref seh_write_bytes). + * @param value Value whose object representation is written. + * @return true on success, false on any write fault or invalid address. + */ + template + requires std::is_trivially_copyable_v + [[nodiscard]] bool seh_write(uintptr_t addr, const T &value) noexcept + { + return seh_write_bytes(addr, std::addressof(value), sizeof(T)); + } + + /** + * @brief Resolves a pointer chain and writes a raw byte range at its end. + * @details Performs the same walk as @ref seh_resolve_chain and then copies @p bytes from @p source into the + * resolved address through the guarded write path, so a fault in the resolve or the terminal write + * fails closed. This is the per-frame WRITE counterpart of @ref seh_read_chain_bytes: it changes no + * page protection and runs no i-cache flush or cache invalidation, so writing a camera transform every + * frame through it is a guarded copy, not the heavy @ref write_bytes path. + * @param base Root address of the chain. + * @param offsets Byte offsets applied left to right (see @ref seh_resolve_chain). An empty span writes at @p + * base. + * @param source Source buffer. nullptr returns false. + * @param bytes Number of bytes to copy. Zero returns true (no-op). + * @return true on a fully successful resolve and write; false if any intermediate link faults or is + * implausible, + * or the terminal write faults. On a terminal fault the target may have been partially written. + */ + [[nodiscard]] bool seh_write_chain_bytes(uintptr_t base, std::span offsets, const void *source, + size_t bytes) noexcept; + + /** + * @brief Convenience overload accepting a braced offset list. + * @see seh_write_chain_bytes(uintptr_t, std::span, const void *, size_t) + */ + [[nodiscard]] inline bool seh_write_chain_bytes(uintptr_t base, std::initializer_list offsets, + const void *source, size_t bytes) noexcept + { + return seh_write_chain_bytes(base, std::span(offsets.begin(), offsets.size()), source, + bytes); + } + + /** + * @brief Resolves a pointer chain and writes a typed value at its end. + * @details Forwards to @ref seh_write_chain_bytes. The value's object representation is copied byte-for-byte. + * @tparam T A trivially copyable type. + * @param base Root address of the chain. + * @param offsets Byte offsets applied left to right (see @ref seh_resolve_chain). + * @param value Value whose object representation is written. + * @return true on success, false if any link faults or is implausible or the terminal write faults. + */ + template + requires std::is_trivially_copyable_v + [[nodiscard]] bool seh_write_chain(uintptr_t base, std::span offsets, const T &value) noexcept + { + return seh_write_chain_bytes(base, offsets, std::addressof(value), sizeof(T)); + } + + /** + * @brief Convenience overload accepting a braced offset list. + * @see seh_write_chain(uintptr_t, std::span, const T &) + */ + template + requires std::is_trivially_copyable_v + [[nodiscard]] bool seh_write_chain(uintptr_t base, std::initializer_list offsets, + const T &value) noexcept + { + return seh_write_chain(base, std::span(offsets.begin(), offsets.size()), value); + } } // namespace Memory } // namespace DetourModKit diff --git a/include/DetourModKit/rtti.hpp b/include/DetourModKit/rtti.hpp index 12bfffe..9a5600a 100644 --- a/include/DetourModKit/rtti.hpp +++ b/include/DetourModKit/rtti.hpp @@ -210,6 +210,27 @@ namespace DetourModKit explicit TypeIdentity(std::string_view mangled, Memory::ModuleRange range = Memory::host_module_range()) noexcept; + /** + * @brief Constructs a cached identity from a null-terminated mangled type name. + * @details This exact-match overload keeps string-literal call sites unambiguous while the deleted + * std::string rvalue overload rejects dangling temporaries. A null pointer is treated as an empty + * name and resolves to no match. + * @param mangled Null-terminated MSVC RTTI name. The backing bytes must outlive this identity. + * @param range Module range searched for the primary vtable. + */ + explicit TypeIdentity(const char *mangled, Memory::ModuleRange range = Memory::host_module_range()) noexcept + : TypeIdentity(mangled != nullptr ? std::string_view(mangled) : std::string_view{}, range) + { + } + + /** + * @brief Rejects std::string temporaries because the identity stores a non-owning view. + * @details A string literal, std::string_view, or long-lived std::string lvalue can still bind safely. A + * std::string rvalue would dangle as soon as the constructor returns, so it is a compile-time + * error. + */ + TypeIdentity(std::string &&mangled, Memory::ModuleRange range = Memory::host_module_range()) = delete; + /** * @brief Tests whether @p vtable is this type's primary vtable. * @details Resolves on first call, then compares. Returns false when the type cannot be resolved, so a @@ -229,9 +250,11 @@ namespace DetourModKit std::string_view m_mangled; Memory::ModuleRange m_range; - // m_cached holds the resolved primary vtable (0 once resolved-but-not-found). m_resolved latches a - // completed resolve and is published with release after m_cached is stored, so an acquire-load that - // observes it also observes the cached value. + // m_cached holds the resolved primary vtable and is written only on a SUCCESSFUL (nonzero) resolve. + // m_resolved latches that success and is published with release after m_cached is stored, so an + // acquire-load that observes m_resolved == true also observes the cached value. A failed resolve latches + // neither flag, so a later call retries once the type becomes resolvable instead of caching the miss as + // permanent. mutable std::atomic m_cached{0}; mutable std::atomic m_resolved{false}; }; diff --git a/include/DetourModKit/rtti_dissect.hpp b/include/DetourModKit/rtti_dissect.hpp index 2922c54..19fb54f 100644 --- a/include/DetourModKit/rtti_dissect.hpp +++ b/include/DetourModKit/rtti_dissect.hpp @@ -167,11 +167,11 @@ namespace DetourModKit * per fallback beyond the single probe the bare primitive performs). The cascade selects the first * RESOLVING slot, not the "best" one: with several valid-but-different objects, declaration order is * the only disambiguator -- a consumer needing type discrimination uses @ref heal_landmark / @ref - * solve_fingerprint instead. On any failure return @p out is left as the last probe wrote it and must - * not be read. + * solve_fingerprint instead. On a failure return @p out is reset to a default-constructed @ref + * PointeeType, so a caller that ignores the error never reads a slot's partially written fields. * @tparam Fallbacks Pack of alternate slot addresses, each convertible to std::uintptr_t. * @param candidate The primary slot address to probe first. - * @param out Receives the identification of the first resolving slot. Unspecified on a failure return. + * @param out Receives the first resolving slot's identification; reset to a default PointeeType on failure. * @param fallbacks Alternate slot addresses, tried in order after @p candidate. * @return A value on the first resolve (@p out populated), or the @p candidate's @ref IdentifyError when all * candidates fail. @@ -194,6 +194,11 @@ namespace DetourModKit { return {}; } + // Every candidate failed. The last probe may have left @p out half-written (e.g. a partial name_buf on a + // NoRtti-after-name miss), so reset it to a clean default before returning. This keeps a caller that + // ignores the error code from reading partially-written fields, while the FIRST (primary) error is still + // the one surfaced. + out = PointeeType{}; return std::unexpected(primary.error()); } @@ -312,7 +317,16 @@ namespace DetourModKit std::ptrdiff_t nominal_offset = 0; /// Search radius per side in bytes (capped at MAX_HEAL_WINDOW). std::size_t window = 0x40; - /// MSVC mangled name to match. Aliases caller storage. + /** + * @brief MSVC mangled name to match. Aliases caller storage. + * @note Non-owning view: the backing bytes must outlive every heal call that reads this landmark. A string + * literal (the documented common case) and any longer-lived std::string / std::string_view are safe. + * Initialising this field from a std::string temporary dangles the view as soon as the full + * expression ends -- a latent use-after-free. Because Landmark is an aggregate (so a heal template + * can be a @c static @c constexpr designated initializer and stay serializable), this cannot be + * rejected with a deleted rvalue overload the way @ref Rtti::TypeIdentity does; the consumer owns the + * lifetime. + */ std::string_view expected_mangled; /// Required slot shape. Indirection indirection = Indirection::PointerToObject; @@ -367,13 +381,22 @@ namespace DetourModKit * - @ref HealError::NoMatch when no slot matched; * - @ref HealError::Ambiguous when both the @c +d and @c -d slots * at the nearest matching distance match (an irreducible tie). - * @note For a struct known to hold more than one field of @c expected_mangled's type, prefer @ref - * solve_fingerprint. A single landmark resolves to the uniquely nearest same-typed slot, so a nearer - * same-typed neighbour heals to the wrong field silently (both satisfy the slot shape); the @ref - * HealError::Ambiguous result fires only for an exact +/- distance tie, not for a nearer decoy. - * solve_fingerprint disambiguates structurally because one uniform delta must fit every field at once. - * @warning Init-time / re-heal-on-miss, not per-frame: each probe runs the syscall-heavy prelude up to twice. - * The window cap bounds the worst case. Allocates nothing (one reused stack @ref PointeeType). + * @warning FAIL-WRONG HAZARD when the window is crowded with same-typed slots. A single landmark resolves to + * the + * uniquely NEAREST slot that satisfies the type + shape, so any of these wins SILENTLY and returns a + * confidently-wrong offset rather than failing closed: + * - a strictly-nearer same-typed DECOY field at the wrong offset (both satisfy the slot shape, and the + * nearer one is returned before the intended field is ever probed); + * - under multiple inheritance, a secondary base subobject whose vtable sits nearer than the primary + * and whose COL still names the complete type (use @ref Indirection::CompleteObject to reject it). + * The @ref HealError::Ambiguous result fires ONLY for an exact +/- distance tie at the nearest + * matching ring, never for a nearer decoy, so a wrong-but-nearer slot is not flagged. Whenever the + * window may be crowded -- a struct that holds more than one field of @c expected_mangled's type, or + * an object that may use multiple inheritance -- prefer @ref solve_fingerprint, which disambiguates + * structurally because one uniform delta must fit every field at once, or narrow @c window / tighten + * the type to a name that is unique in the neighbourhood. + * @note Init-time / re-heal-on-miss, not per-frame: each probe runs the syscall-heavy prelude up to twice. The + * window cap bounds the worst case. Allocates nothing (one reused stack @ref PointeeType). */ [[nodiscard]] std::expected heal_landmark(const Landmark &lm) noexcept; @@ -382,6 +405,13 @@ namespace DetourModKit * @details Feeds straight into a @c std::span pointer-chain API. * @param lm The landmark, with @c base filled in. * @return The healed offset, or std::nullopt on any failure. + * @warning Inherits @ref heal_landmark's crowding FAIL-WRONG HAZARD, and discards every signal that would let a + * caller detect it: the std::nullopt return collapses all @ref HealError reasons, and the dropped @ref + * HealHit::col_offset / @ref HealHit::was_pointer hide a secondary-base or direct-object match. A + * strictly-nearer same-typed decoy (or an MI secondary base) still wins SILENTLY and yields a + * confidently-wrong offset with no indication. Use this only when the window cannot be crowded; when + * it can, call @ref heal_landmark (to inspect the full @ref HealHit) or @ref solve_fingerprint (to + * disambiguate structurally across several co-moving fields). */ [[nodiscard]] std::optional heal_offset(const Landmark &lm) noexcept; @@ -430,9 +460,9 @@ namespace DetourModKit * - @ref HealError::Ambiguous when two or more deltas tie for the * most optional matches. * @note Each landmark in @p fp must have a distinct @c nominal_offset. Corroboration is scored by counting the - * required landmarks satisfied at a delta, so two landmarks sharing a nominal_offset probe the same slot - * and would double-count it, reporting stronger agreement than the template provides. Distinct offsets - * are an author-side invariant of the constexpr template, not validated at runtime. + * required landmarks satisfied at a delta, so two landmarks sharing a nominal_offset would probe the same + * slot and double-count it. Duplicate offsets are rejected as @ref HealError::BadDescriptor before any + * memory is touched. * @warning Init-time only: the probe count is (2 * window_bytes / 8 + 1) * fp.size() prelude walks. Allocates * nothing. */ diff --git a/include/DetourModKit/version.hpp.in b/include/DetourModKit/version.hpp.in index 792b849..e7386ab 100644 --- a/include/DetourModKit/version.hpp.in +++ b/include/DetourModKit/version.hpp.in @@ -16,10 +16,12 @@ /** * @brief Encodes major.minor.patch into a single integer for compile-time comparisons. - * @details Example: DMK_VERSION_AT_LEAST(2, 3, 0) + * @details Example: DMK_VERSION_AT_LEAST(2, 3, 0). The minor and patch fields each + * reserve three decimal digits (0..999), so a two-digit-component scheme can + * never collide a higher minor with a lower one from the next major. */ #define DMK_MAKE_VERSION(major, minor, patch) \ - ((major) * 10000 + (minor) * 100 + (patch)) + ((major) * 1000000 + (minor) * 1000 + (patch)) #define DMK_VERSION \ DMK_MAKE_VERSION(DMK_VERSION_MAJOR, DMK_VERSION_MINOR, DMK_VERSION_PATCH) diff --git a/src/bootstrap.cpp b/src/bootstrap.cpp index 21656d4..58a7ccc 100644 --- a/src/bootstrap.cpp +++ b/src/bootstrap.cpp @@ -198,63 +198,83 @@ namespace DetourModKit::Bootstrap { } } - } // anonymous namespace - [[nodiscard]] bool32_t on_dll_attach(module_handle_t hMod, const ModInfo &info, std::function init_fn, - std::function shutdown_fn) - { - if (s_shutdown_event || s_worker_thread) + // The throwing core of on_dll_attach, separated so the public entry point can be noexcept. on_dll_attach is + // called from DllMain under the Windows loader lock, where an escaping exception is undefined behavior. Any + // throwing step here -- most concretely Logger::configure, which builds std::string / std::wstring for the log + // prefix and path and can raise std::bad_alloc -- propagates to the noexcept wrapper, which fails closed. + [[nodiscard]] bool32_t attach_core(module_handle_t hMod, const ModInfo &info, std::function init_fn, + std::function shutdown_fn) { - return FALSE; - } + if (s_shutdown_event || s_worker_thread) + { + return FALSE; + } - s_module = hMod; - if (hMod) - { - DisableThreadLibraryCalls(hMod); - } + s_module = hMod; + if (hMod) + { + DisableThreadLibraryCalls(hMod); + } - if (!is_target_process(info.game_process_name)) - { - s_module = nullptr; - return FALSE; - } + if (!is_target_process(info.game_process_name)) + { + s_module = nullptr; + return FALSE; + } - if (!acquire_instance_mutex(info.instance_mutex_prefix)) - { - s_module = nullptr; - return FALSE; - } + if (!acquire_instance_mutex(info.instance_mutex_prefix)) + { + s_module = nullptr; + return FALSE; + } - Logger::configure(info.prefix, info.log_file); - Logger::get_instance().enable_async_mode(info.async_cfg); + Logger::configure(info.prefix, info.log_file); + Logger::get_instance().enable_async_mode(info.async_cfg); - s_init_fn = std::move(init_fn); - s_shutdown_fn = std::move(shutdown_fn); + s_init_fn = std::move(init_fn); + s_shutdown_fn = std::move(shutdown_fn); - s_shutdown_event = CreateEventW(nullptr, TRUE, FALSE, nullptr); - if (!s_shutdown_event) - { - unwind_early_attach_failure(); - return FALSE; + s_shutdown_event = CreateEventW(nullptr, TRUE, FALSE, nullptr); + if (!s_shutdown_event) + { + unwind_early_attach_failure(); + return FALSE; + } + + s_worker_thread = CreateThread(nullptr, 0, lifecycle_thread, nullptr, 0, nullptr); + if (!s_worker_thread) + { + unwind_early_attach_failure(); + return FALSE; + } + + // Re-arm the detach gate now that a fresh attach has fully succeeded, so the matching on_dll_detach runs + // its teardown instead of no-opping. Without this reset s_detach_called stays true after the first detach + // and a second attach/detach cycle would leak the worker thread, shutdown event, and instance mutex (the + // header contract promises a subsequent attach starts from a clean slate). Reset only on the success path: + // early failures above never set the gate, so they must not clear it either. Release pairs with the acquire + // in on_dll_detach's compare_exchange. Both run serialized under the loader lock. + s_detach_called.store(false, std::memory_order_release); + + return TRUE; } + } // anonymous namespace - s_worker_thread = CreateThread(nullptr, 0, lifecycle_thread, nullptr, 0, nullptr); - if (!s_worker_thread) + [[nodiscard]] bool32_t on_dll_attach(module_handle_t hMod, const ModInfo &info, std::function init_fn, + std::function shutdown_fn) noexcept + { + // Fail closed on any exception so nothing unwinds across the loader lock (see attach_core). On a throw the + // partially-built attach state is rolled back through the same path the explicit early-failure returns use. + try + { + return attach_core(hMod, info, std::move(init_fn), std::move(shutdown_fn)); + } + catch (...) { unwind_early_attach_failure(); return FALSE; } - - // Re-arm the detach gate now that a fresh attach has fully succeeded, so the matching on_dll_detach runs its - // teardown instead of no-opping. Without this reset s_detach_called stays true after the first detach and a - // second attach/detach cycle would leak the worker thread, shutdown event, and instance mutex (the header - // contract promises a subsequent attach starts from a clean slate). Reset only on the success path: early - // failures above never set the gate, so they must not clear it either. Release pairs with the acquire in - // on_dll_detach's compare_exchange. Both run serialized under the loader lock. - s_detach_called.store(false, std::memory_order_release); - - return TRUE; } void request_shutdown() noexcept diff --git a/src/config.cpp b/src/config.cpp index a96720b..14e85dc 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -388,7 +388,46 @@ namespace DetourModKit // below instead, because its parse path differs (nullptr INI value handling, combo-list parsing). if constexpr (std::same_as) { - current_value = static_cast(ini.GetLongValue(section.c_str(), ini_key.c_str(), default_value)); + // SimpleIni's GetLongValue parses into a long, which is 32-bit on this LLP64 target, so a value + // beyond int range is silently saturated by strtol (e.g. "5000000000" becomes INT_MAX) rather than + // rejected. Read the raw string and parse it as a 64-bit integer instead, so an out-of-range or + // non-numeric value falls back to the registered default with a Warning -- mirroring how + // parse_input_code_list rejects a bad token rather than wrapping it. Preserve GetLongValue's base + // rules: 0x-prefixed values are hexadecimal and everything else is decimal (including leading-zero + // values such as "010"). + const char *raw = ini.GetValue(section.c_str(), ini_key.c_str(), nullptr); + if (raw == nullptr) + { + current_value = default_value; + } + else + { + const char *parse_begin = raw; + int base = 10; + if (raw[0] == '0' && (raw[1] == 'x' || raw[1] == 'X')) + { + parse_begin = raw + 2; + base = 16; + } + + errno = 0; + char *end = nullptr; + const long long parsed = std::strtoll(parse_begin, &end, base); + const bool fully_consumed = (end != nullptr && end != parse_begin && *end == '\0'); + if (!fully_consumed || errno == ERANGE || + parsed < static_cast(std::numeric_limits::min()) || + parsed > static_cast(std::numeric_limits::max())) + { + logger.warning("Config: value '{}' for '{}' is not a valid int (non-numeric or out of " + "range); using default {}.", + raw, ini_key, default_value); + current_value = default_value; + } + else + { + current_value = static_cast(parsed); + } + } } else if constexpr (std::same_as) { @@ -483,6 +522,17 @@ namespace DetourModKit return s_last_loaded_ini_path; } + // Tear-free snapshot of the last-loaded INI path. Takes get_config_mutex() itself and returns a copy, so a + // caller that only needs to read the path cannot observe a reference torn by a concurrent reload()/load() + // mutating the underlying string. Use this instead of get_last_loaded_ini_path() at any read site that is not + // already inside a held get_config_mutex() critical section (the mutex is non-recursive). The string copy can + // allocate, so this is intentionally not noexcept. + [[nodiscard]] std::string snapshot_last_loaded_ini_path() + { + std::lock_guard lock(get_config_mutex()); + return get_last_loaded_ini_path(); + } + // Content hash of the bytes last successfully loaded from the INI file. std::nullopt until the first successful // load() (or after clear_registered_items(), which wipes it alongside the path). Caller must hold // get_config_mutex() when reading or writing. @@ -1119,9 +1169,25 @@ namespace DetourModKit // Invoke setter callbacks outside the config mutex -- same deferred // pattern as register_*(). Setters may safely call back into Config. + // Wrap each call so a single throwing setter cannot prevent the remaining setters from applying the freshly + // loaded values, mirroring reload_impl(): the initial load() and a reload() share the same per-setter isolation + // so one bad item degrades to a logged warning instead of aborting the whole load. The logger is acquired + // outside the config mutex -- a custom Logger sink that re-enters Config cannot AB/BA deadlock here. + Logger &setter_logger = Logger::get_instance(); for (auto &cb : deferred_callbacks) { - cb(); + try + { + cb(); + } + catch (const std::exception &e) + { + setter_logger.error("Config: load setter threw: {}", e.what()); + } + catch (...) + { + setter_logger.error("Config: load setter threw unknown exception."); + } } } @@ -1266,11 +1332,7 @@ namespace DetourModKit DetourModKit::Config::enable_auto_reload(std::chrono::milliseconds debounce_window, std::function on_reload) { - std::string ini_filename; - { - std::lock_guard lock(get_config_mutex()); - ini_filename = get_last_loaded_ini_path(); - } + const std::string ini_filename = snapshot_last_loaded_ini_path(); Logger &logger = Logger::get_instance(); diff --git a/src/config_watcher.cpp b/src/config_watcher.cpp index ba7e0f6..0476dd1 100644 --- a/src/config_watcher.cpp +++ b/src/config_watcher.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -481,15 +482,42 @@ namespace DetourModKit // DEBUG edge rather than staying silent forever. overflow_logged = false; - // Walk the FILE_NOTIFY_INFORMATION chain. + // Walk the FILE_NOTIFY_INFORMATION chain. The kernel is trusted, but every kernel-supplied + // length/offset is bounds-checked against the buffer before any read or advance: trusting + // FileNameLength or NextEntryOffset blindly would turn a corrupt/malicious completion into an + // out-of-bounds read of the worker's heap buffer. On any inconsistency the walk stops (fails + // closed) rather than reading past the bytes the kernel actually returned. const BYTE *cursor = buffer.data(); const BYTE *const end_ptr = cursor + bytes_transferred; - while (cursor + sizeof(FILE_NOTIFY_INFORMATION) <= end_ptr) + // Offset of the variable-length FileName[] member; the fixed header occupies the bytes before + // it. Used to bound both the header and the filename extent against end_ptr. + constexpr size_t name_field_offset = offsetof(FILE_NOTIFY_INFORMATION, FileName); + + // (a) The entry header itself must fit before we dereference any of its fields. Compare on the + // remaining span before forming cursor + name_field_offset, so malformed trailing bytes cannot + // make the bounds check itself step outside the buffer. + while (static_cast(end_ptr - cursor) >= name_field_offset) { const auto *info = reinterpret_cast(cursor); - const size_t name_len = info->FileNameLength / sizeof(WCHAR); + const DWORD name_bytes = info->FileNameLength; + + // (c) FileNameLength must be a whole number of WCHARs; an odd byte count is malformed. + if (name_bytes % sizeof(WCHAR) != 0) + { + break; + } + + // (b) FileName + FileNameLength must not run past the buffer end. Compare on the available + // span (end_ptr - FileName) so the addition cannot overflow a pointer. + const BYTE *const name_start = cursor + name_field_offset; + if (name_bytes > static_cast(end_ptr - name_start)) + { + break; + } + + const size_t name_len = name_bytes / sizeof(WCHAR); const std::wstring_view changed_name(info->FileName, name_len); // Match against target filename (case-insensitive). Rename-swap-save (temp -> target) @@ -499,11 +527,22 @@ namespace DetourModKit matched = true; } - if (info->NextEntryOffset == 0) + // A zero NextEntryOffset terminates the walk (the spec's end-of-chain marker). + const DWORD next = info->NextEntryOffset; + if (next == 0) + { + break; + } + + // (d) NextEntryOffset must advance past at least this entry's header (forward progress, so + // a bogus small value cannot loop or alias the current entry) and must keep the next + // entry's start at or before the buffer end; the loop condition then re-validates that the + // next entry's header fully fits. Compare on the available span to avoid pointer overflow. + if (next < name_field_offset || next > static_cast(end_ptr - cursor)) { break; } - cursor += info->NextEntryOffset; + cursor += next; } } diff --git a/src/input_intercept.cpp b/src/input_intercept.cpp index 3abae84..bf163a3 100644 --- a/src/input_intercept.cpp +++ b/src/input_intercept.cpp @@ -636,6 +636,18 @@ namespace DetourModKit::detail { s_prev_wndproc.store(prev, std::memory_order_release); } + + // Drain any notches the wndproc detour latched while no binding owned the wheel. uninstall() drops the consume + // flag but leaves the detour live (it may stay layered under a foreign subclass), so it keeps incrementing + // s_wheel_count between an unbind and this re-arm. Without this reset the first take_wheel_counts() after a + // re-bind would replay that stale backlog as a burst of phantom notches. This is a fresh-install transition + // (the idempotent already-installed path returned above), so resetting here cannot discard counts a live + // binding is about to consume. + for (auto &count : s_wheel_count) + { + count.store(0, std::memory_order_relaxed); + } + s_wndproc_installed.store(true, std::memory_order_release); return true; } diff --git a/src/memory.cpp b/src/memory.cpp index c89016b..e14e454 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -933,18 +933,18 @@ namespace DetourModKit } // anonymous namespace #ifndef _MSC_VER - // MinGW/GCC has no __try / __except, so the foreign-read probes in this file cannot wrap their reads in frame-based - // SEH the way the MSVC paths do. A single process-wide vectored exception handler provides the equivalent fault - // guard: each guarded read marks the foreign range it is about to touch in a thread-local slot, and a read fault - // inside that range is intercepted and turned into a clean failure instead of terminating the host. The guarded - // path avoids a per-call VirtualQuery on successful terminal reads and keeps stale cache entries from authorizing - // unguarded dereferences after a page is reprotected. + // MinGW/GCC has no __try / __except, so the foreign-memory probes in this file cannot wrap their accesses in + // frame-based SEH the way the MSVC paths do. A single process-wide vectored exception handler provides the + // equivalent fault guard: each guarded access marks the foreign range it is about to touch in a thread-local slot, + // and a fault inside that range is intercepted and turned into a clean failure instead of terminating the host. The + // guarded path avoids a per-call VirtualQuery on successful terminal reads/writes and keeps stale cache entries + // from authorizing unguarded dereferences after a page is reprotected. namespace { - // VirtualQuery-validated copy. On x64 it is the fallback used only when the vectored handler could not be - // installed (it carries a query-to-copy TOCTOU window the handler path does not, so it is never the normal path - // there); on a 32-bit MinGW build, where the handler's x64 register redirect is unavailable, it is the only - // guard. Validates every region the read spans before copying. + // VirtualQuery-validated read. On x64 it is the fallback used only when the vectored handler could not be + // installed; on a 32-bit MinGW build, where the handler's x64 register redirect is unavailable, it is the only + // guard. The copy itself goes through ReadProcessMemory so a page that changes after the query fails as an API + // result rather than as a user-mode fault. bool virtualquery_validated_copy(uintptr_t addr, void *out, size_t bytes) noexcept { size_t copied = 0; @@ -970,38 +970,81 @@ namespace DetourModKit const size_t available = static_cast(region_end - cur); const size_t remaining = bytes - copied; const size_t to_copy = (remaining < available) ? remaining : available; - std::memcpy(static_cast(out) + copied, reinterpret_cast(cur), to_copy); + SIZE_T copied_now = 0; + if (!ReadProcessMemory(GetCurrentProcess(), reinterpret_cast(cur), + static_cast(out) + copied, to_copy, &copied_now) || + copied_now != to_copy) + return false; + copied += to_copy; + } + return true; + } + + // VirtualQuery-validated write fallback for MinGW when no frame/vectored fault guard is available. It never + // changes page protection: if the current protection is not writable, the write fails closed. The copy itself + // goes through WriteProcessMemory so a page that changes after the query fails as an API result rather than as + // a user-mode fault. + bool virtualquery_validated_write(uintptr_t addr, const void *source, size_t bytes) noexcept + { + size_t copied = 0; + while (copied < bytes) + { + const uintptr_t cur = addr + copied; + MEMORY_BASIC_INFORMATION mbi{}; + if (!VirtualQuery(reinterpret_cast(cur), &mbi, sizeof(mbi))) + return false; + if (mbi.State != MEM_COMMIT) + return false; + if ((mbi.Protect & CachePermissions::WRITE_PERMISSION_FLAGS) == 0 || + (mbi.Protect & CachePermissions::NOACCESS_GUARD_FLAGS) != 0) + return false; + + const uintptr_t region_start = reinterpret_cast(mbi.BaseAddress); + const uintptr_t region_end = region_start + mbi.RegionSize; + if (region_end < region_start) + return false; + if (cur < region_start || cur >= region_end) + return false; + + const size_t available = static_cast(region_end - cur); + const size_t remaining = bytes - copied; + const size_t to_copy = (remaining < available) ? remaining : available; + SIZE_T copied_now = 0; + if (!WriteProcessMemory(GetCurrentProcess(), reinterpret_cast(cur), + static_cast(source) + copied, to_copy, &copied_now) || + copied_now != to_copy) + return false; copied += to_copy; } return true; } #if defined(_WIN64) - // Per-read record describing the foreign range and the recovery snapshot. It lives on the guarded read's own - // stack (one per nested-free synchronous read) and is published to the thread's Win32 TLS slot for the duration - // of the read; the handler reads that slot. A Win32 TLS slot is used rather than a thread_local / __thread - // because mingw lowers thread-locals to __emutls_get_address, which allocates and locks on a thread's first - // access -- forbidden in the exception-dispatch context the handler runs in. TlsGetValue is documented to be - // callable there: it reads the thread's TLS array with no allocation and no lock, and returns null on any - // thread that has not armed a read. - struct VehReadGuard + // Per-access record describing the foreign range and the recovery snapshot. It lives on the guarded access's + // own stack (one per nested-free synchronous access) and is published to the thread's Win32 TLS slot for the + // duration of the access; the handler reads that slot. A Win32 TLS slot is used rather than a thread_local / + // __thread because mingw lowers thread-locals to __emutls_get_address, which allocates and locks on a thread's + // first access -- forbidden in the exception-dispatch context the handler runs in. TlsGetValue is documented to + // be callable there: it reads the thread's TLS array with no allocation and no lock, and returns null on any + // thread that has not armed an access. + struct VehAccessGuard { void *env[5]; // __builtin_setjmp buffer; the recovery stub longjmps through it (5 words is the GCC ABI) - uintptr_t guard_lo; // first byte of the foreign source range being read + uintptr_t guard_lo; // first byte of the foreign range being accessed uintptr_t guard_hi; // one past the last byte of that range }; std::mutex s_veh_mutex; std::atomic s_veh_handle{nullptr}; // Process-lifetime TLS index, allocated once and reused across install/remove cycles (never freed so a removal - // can never invalidate an index a concurrent read still holds). The handler reads it with an acquire load. + // can never invalidate an index a concurrent access still holds). The handler reads it with an acquire load. std::atomic s_veh_tls_index{TLS_OUT_OF_INDEXES}; - // Count of reads currently on the guarded path. shutdown_cache drains this to zero before unregistering the + // Count of accesses currently on the guarded path. shutdown_cache drains this to zero before unregistering the // handler so a fault can never arrive after the handler is gone. std::atomic s_veh_in_flight{0}; // Recovery stub the handler redirects a faulting thread into. __builtin_longjmp restores the stack pointer, - // frame pointer and program counter from the snapshot the matching __builtin_setjmp captured before the read, + // frame pointer and program counter from the snapshot the matching __builtin_setjmp captured before the access, // so recovery is correct no matter which frame the fault occurred in and without invoking SEH unwinding (which // can abort when unwound from a vectored-handler-resumed context). The handler passes the buffer in the // first-argument register so the stub touches no thread-local itself. noinline gives it a stable address for @@ -1012,19 +1055,19 @@ namespace DetourModKit } // Vectored exception handler, installed at the front of the list. It claims a fault only when the current - // thread is inside a guarded read (the TLS slot is non-null), the code is one a guarded probe owns + // thread is inside a guarded access (the TLS slot is non-null), the code is one a guarded probe owns // (is_guarded_read_fault -- the same set the MSVC __except filters use), the record carries a faulting address, - // and that address falls inside the foreign range being read. Every other fault is passed straight through, so - // a fault from the destination write, a host software exception reusing one of these codes, or any code running - // outside a guarded read still reaches the host's own handlers unchanged. On a claimed fault it redirects the - // thread into veh_perform_longjmp, which reports the read as failed. + // and that address falls inside the foreign range being accessed. Every other fault is passed straight through, + // so a host software exception reusing one of these codes, or any code running outside a guarded access, still + // reaches the host's own handlers unchanged. On a claimed fault it redirects the thread into + // veh_perform_longjmp, which reports the access as failed. LONG NTAPI dmk_veh_read_handler(PEXCEPTION_POINTERS info) noexcept { const DWORD slot = s_veh_tls_index.load(std::memory_order_acquire); if (slot == TLS_OUT_OF_INDEXES) return EXCEPTION_CONTINUE_SEARCH; - auto *const guard = static_cast(TlsGetValue(slot)); + auto *const guard = static_cast(TlsGetValue(slot)); if (guard == nullptr) return EXCEPTION_CONTINUE_SEARCH; @@ -1032,15 +1075,15 @@ namespace DetourModKit if (!Memory::detail::is_guarded_read_fault(record->ExceptionCode)) return EXCEPTION_CONTINUE_SEARCH; - // A guarded foreign read can only fault with a hardware access-violation, guard-page or in-page-error, all - // of which carry the faulting data address in ExceptionInformation[1]. Refuse to claim a record without it: - // that rules out a host RaiseException reusing one of these NTSTATUS codes with no address from being - // hijacked out of the host's control flow while a guarded read happens to be in flight on this thread. + // A guarded foreign access can only fault with a hardware access-violation, guard-page or in-page-error, + // all of which carry the faulting data address in ExceptionInformation[1]. Refuse to claim a record without + // it: that rules out a host RaiseException reusing one of these NTSTATUS codes with no address from being + // hijacked out of the host's control flow while a guarded access happens to be in flight on this thread. if (record->NumberParameters < 2) return EXCEPTION_CONTINUE_SEARCH; - // Confine the claim to the foreign source range, so a fault from the destination write (a caller bug, not a - // foreign-read fault) reaches the host's handlers instead of being silently swallowed. + // Confine the claim to the foreign range this operation explicitly armed. A bug that faults outside the + // range reaches the host's handlers instead of being silently swallowed. const uintptr_t fault_address = static_cast(record->ExceptionInformation[1]); if (fault_address < guard->guard_lo || fault_address >= guard->guard_hi) return EXCEPTION_CONTINUE_SEARCH; @@ -1061,9 +1104,10 @@ namespace DetourModKit } // Install the handler once, lazily. Re-installable across an init/shutdown cycle: shutdown_cache removes it and - // clears the handle, so a later guarded read or re-init installs a fresh one. Best-effort: if either TlsAlloc - // or AddVectoredExceptionHandler fails (realistic only under exhaustion) the handle stays null and the guarded - // read paths fall back to VirtualQuery validation. + // clears the handle, so a later guarded access or re-init installs a fresh one. Best-effort: if either TlsAlloc + // or AddVectoredExceptionHandler fails (realistic only under exhaustion) the handle stays null; byte-copy + // guards fall back to VirtualQuery plus ReadProcessMemory / WriteProcessMemory, while in-place region guards + // fail closed without touching the foreign range. void ensure_veh_installed() noexcept { if (s_veh_handle.load(std::memory_order_acquire) != nullptr) @@ -1080,16 +1124,17 @@ namespace DetourModKit { const DWORD slot = TlsAlloc(); if (slot == TLS_OUT_OF_INDEXES) - unavailable_reason = "TLS slot exhausted"; // cannot guard; read paths fall back to VirtualQuery + unavailable_reason = + "TLS slot exhausted"; // cannot guard; access paths take their fail-closed fallback else s_veh_tls_index.store(slot, std::memory_order_release); } if (unavailable_reason == nullptr) { - // First in the list (FirstHandler = 1): a guarded read always resolves through this handler before - // any consumer VEH or frame-based SEH. Every fault that is not this thread's own in-flight guarded - // read is passed through with EXCEPTION_CONTINUE_SEARCH, so being first never starves the host's - // handlers. + // First in the list (FirstHandler = 1): a guarded access always resolves through this handler + // before any consumer VEH or frame-based SEH. Every fault that is not this thread's own in-flight + // guarded access is passed through with EXCEPTION_CONTINUE_SEARCH, so being first never starves the + // host's handlers. void *const handle = AddVectoredExceptionHandler(1, dmk_veh_read_handler); s_veh_handle.store(handle, std::memory_order_release); if (handle == nullptr) @@ -1097,10 +1142,10 @@ namespace DetourModKit } } - // Surface a guard-unavailable condition once. The guarded reads stay correct (they fall back to - // VirtualQuery validation), so this is observability only; a single latched emission keeps a retried read - // path -- which re-enters here while the handle stays null -- from spamming the sink. Realistic only under - // resource exhaustion. + // Surface a guard-unavailable condition once. The guarded access paths stay correct by failing closed or + // using kernel-mediated byte copies, so this is observability only; a single latched emission keeps a + // retried path -- which re-enters here while the handle stays null -- from spamming the sink. Realistic + // only under resource exhaustion. if (unavailable_reason != nullptr) { static std::atomic s_veh_unavailable_warned{false}; @@ -1108,8 +1153,8 @@ namespace DetourModKit { (void)Logger::get_instance().try_log( LogLevel::Warning, - "MemoryCache: vectored read guard unavailable ({}); guarded reads fall back to VirtualQuery " - "validation.", + "MemoryCache: vectored access guard unavailable ({}); guarded byte copies use kernel-mediated " + "fallbacks and guarded region scans fail closed.", unavailable_reason); } } @@ -1121,11 +1166,11 @@ namespace DetourModKit void *const handle = s_veh_handle.load(std::memory_order_relaxed); if (handle == nullptr) return; - // Stop new guarded reads from taking the handler path, then wait for any read already committed to it to - // finish before unregistering, so a fault cannot arrive after the handler is gone. The seq_cst store pairs - // with the seq_cst fetch_add / handle-load in veh_read_bytes (the Dekker protocol the cache reader epoch - // uses): a read that observed a live handle is necessarily counted in s_veh_in_flight before this store is - // observed. + // Stop new guarded accesses from taking the handler path, then wait for any access already committed to it + // to finish before unregistering, so a fault cannot arrive after the handler is gone. The seq_cst store + // pairs with the seq_cst fetch_add / handle-load in the guarded access helpers (the Dekker protocol the + // cache reader epoch uses): an access that observed a live handle is necessarily counted in s_veh_in_flight + // before this store is observed. s_veh_handle.store(nullptr, std::memory_order_seq_cst); int spins = 0; while (s_veh_in_flight.load(std::memory_order_seq_cst) > 0) @@ -1150,7 +1195,7 @@ namespace DetourModKit __attribute__((noinline)) bool veh_guarded_copy(void *out, const void *src, size_t len) noexcept { const DWORD slot = s_veh_tls_index.load(std::memory_order_acquire); - VehReadGuard guard; + VehAccessGuard guard; guard.guard_lo = reinterpret_cast(src); guard.guard_hi = guard.guard_lo + len; @@ -1175,30 +1220,30 @@ namespace DetourModKit return true; } - // Runs fn(ctx) with the vectored handler armed over [lo, hi). The sibling of veh_guarded_copy for an in-place - // read the caller performs (the scanner's memchr / SIMD sweep) rather than a rep movsb copy: __builtin_setjmp - // records the recovery point, the guard is published to this thread's TLS slot so a read fault inside [lo, hi) - // is claimable, and the handler longjmps back here so the setjmp expression returns non-zero and the function - // reports failure. fn must touch only [lo, hi); a fault outside that range (e.g. a bug in fn) is not claimed - // and reaches the host's handlers. fn is abandoned on a fault via __builtin_longjmp without running - // destructors, so it must hold no resources that need unwinding -- the scanner sweep is plain POD locals. - // noinline keeps the setjmp anchor and the fn call in one self-contained frame. + // Runs fn(ctx) with the vectored handler armed over [lo, hi). Used for in-place accesses where the operation is + // not the simple rep movsb read that veh_guarded_copy performs. __builtin_setjmp records the recovery point, + // the guard is published to this thread's TLS slot so a fault inside [lo, hi) is claimable, and the handler + // longjmps back here so the setjmp expression returns non-zero and the function reports failure. fn must touch + // only [lo, hi); a fault outside that range (e.g. a bug in fn) is not claimed and reaches the host's handlers. + // fn is abandoned on a fault via __builtin_longjmp without running destructors, so it must hold no resources + // that need unwinding -- the scanner sweep and write wrapper use only POD locals. noinline keeps the setjmp + // anchor and the fn call in one self-contained frame. __attribute__((noinline)) bool veh_guarded_region(uintptr_t lo, uintptr_t hi, void (*fn)(void *) noexcept, void *ctx) noexcept { const DWORD slot = s_veh_tls_index.load(std::memory_order_acquire); - VehReadGuard guard; + VehAccessGuard guard; guard.guard_lo = lo; guard.guard_hi = hi; if (__builtin_setjmp(guard.env) != 0) { - // Reached only when the handler longjmped here after swallowing a read fault inside [lo, hi); the + // Reached only when the handler longjmped here after swallowing a fault inside [lo, hi); the // handler already cleared the TLS slot. Report the failure. return false; } - // Arm after the setjmp captures env and before invoking the sweep, so a fault in fn's reads is claimable + // Arm after the setjmp captures env and before invoking fn, so a fault in the guarded access is claimable // while a fault before the buffer is valid is not. TlsSetValue(slot, &guard); fn(ctx); @@ -1210,7 +1255,8 @@ namespace DetourModKit // addr + bytes would invert the handler's [guard_lo, guard_hi) check and let a real fault escape the guard); // mirrors seh_read_bytes' own precheck so read_ptr_unsafe, which has no precheck of its own, is covered too. // Counts the read in the drain epoch around the path decision so a read on the guarded path is always visible - // to remove_veh_handler's drain. Falls back to VirtualQuery validation when the handler is unavailable. + // to remove_veh_handler's drain. Falls back to a VirtualQuery plus ReadProcessMemory copy when the handler is + // unavailable. bool veh_read_bytes(uintptr_t addr, void *out, size_t bytes) noexcept { if (addr < SEH_READ_MIN_VALID_ADDR || addr + bytes < addr) @@ -1225,16 +1271,51 @@ namespace DetourModKit s_veh_in_flight.fetch_sub(1, std::memory_order_release); return ok; } + + bool veh_write_bytes(uintptr_t addr, const void *source, size_t bytes) noexcept + { + if (addr < SEH_READ_MIN_VALID_ADDR || addr + bytes < addr) + return false; + + struct WriteContext + { + uintptr_t dst; + const void *src; + size_t bytes; + } ctx{addr, source, bytes}; + + const auto do_write = [](void *opaque) noexcept -> void + { + auto *context = static_cast(opaque); + std::memcpy(reinterpret_cast(context->dst), context->src, context->bytes); + }; + + ensure_veh_installed(); + + s_veh_in_flight.fetch_add(1, std::memory_order_seq_cst); + const bool armed = s_veh_handle.load(std::memory_order_seq_cst) != nullptr; + const bool ok = armed ? veh_guarded_region(addr, addr + bytes, do_write, &ctx) + : virtualquery_validated_write(addr, source, bytes); + s_veh_in_flight.fetch_sub(1, std::memory_order_release); + return ok; + } #else // !_WIN64 // 32-bit MinGW: the handler's recovery redirect rewrites x64 CONTEXT registers (Rcx/Rip) and the longjmp buffer // is x64-sized, so the vectored guard is x64-only. A guarded read here validates every region with VirtualQuery - // before copying instead. + // before copying through ReadProcessMemory instead. bool veh_read_bytes(uintptr_t addr, void *out, size_t bytes) noexcept { if (addr < SEH_READ_MIN_VALID_ADDR || addr + bytes < addr) return false; return virtualquery_validated_copy(addr, out, bytes); } + + bool veh_write_bytes(uintptr_t addr, const void *source, size_t bytes) noexcept + { + if (addr < SEH_READ_MIN_VALID_ADDR || addr + bytes < addr) + return false; + return virtualquery_validated_write(addr, source, bytes); + } #endif // _WIN64 } // anonymous namespace #endif // !_MSC_VER @@ -1243,8 +1324,8 @@ namespace DetourModKit bool DetourModKit::Memory::detail::run_guarded_region(uintptr_t lo, uintptr_t hi, void (*fn)(void *) noexcept, void *ctx) noexcept { - // An empty or wrapping range has nothing to guard; run the read directly. A wrapped [lo, hi) would also invert - // the handler's range check, the same input veh_read_bytes rejects up front. + // An empty or wrapping range has nothing to guard; run the access directly. A wrapped [lo, hi) would also + // invert the handler's range check, the same input veh_read_bytes rejects up front. if (hi <= lo) { fn(ctx); @@ -1253,7 +1334,7 @@ namespace DetourModKit ensure_veh_installed(); - // Count the call in the drain epoch around the path decision (mirroring veh_read_bytes) so a guarded read is + // Count the call in the drain epoch around the path decision (mirroring veh_read_bytes) so a guarded access is // always visible to remove_veh_handler's drain. s_veh_in_flight.fetch_add(1, std::memory_order_seq_cst); const bool armed = s_veh_handle.load(std::memory_order_seq_cst) != nullptr; @@ -1264,10 +1345,10 @@ namespace DetourModKit } else { - // Handler unavailable (install failed, realistic only under resource exhaustion): run the read unguarded. - // The caller's own per-region VirtualQuery gate already proved the region readable at gate time -- the same - // fallback posture veh_read_bytes takes when it drops to virtualquery_validated_copy. - fn(ctx); + // Handler unavailable (install failed, realistic only under resource exhaustion): do not run an in-place + // scan unguarded. The caller treats false as a skipped/faulted region and fails uniqueness-sensitive work + // closed. + completed = false; } s_veh_in_flight.fetch_sub(1, std::memory_order_release); return completed; @@ -1766,8 +1847,14 @@ namespace DetourModKit return std::unexpected(MemoryError::ProtectionRestoreFailed); } - logger.debug("write_bytes: Successfully wrote {} bytes to address {}.", num_bytes, - DetourModKit::Format::format_address(reinterpret_cast(target_address))); + // Gate the success log behind the level check so the format_address call is skipped entirely when Debug is off + // (the shipping default). write_bytes is setup/patch-only, but a consumer that does drive it frequently should + // not pay a hex format per call just to discard the line. + if (logger.is_enabled(LogLevel::Debug)) + { + logger.debug("write_bytes: Successfully wrote {} bytes to address {}.", num_bytes, + DetourModKit::Format::format_address(reinterpret_cast(target_address))); + } return {}; } @@ -1986,6 +2073,91 @@ namespace DetourModKit #endif } + bool DetourModKit::Memory::seh_write_bytes(uintptr_t addr, const void *source, size_t bytes) noexcept + { + if (bytes == 0) + return true; + if (!source || addr < SEH_READ_MIN_VALID_ADDR) + return false; + + // Overflow guard on (addr + bytes); a wraparound destination range can never be a valid mapped target. + if (addr + bytes < addr) + return false; + +#ifdef _MSC_VER + __try + { +#if defined(__SANITIZE_ADDRESS__) + // Copy via __movsb (rep movsb) under ASan for the same reason seh_read_bytes does: MSVC routes std::memcpy + // through the ASan interceptor, which inspects the operands against ASan's shadow and false-positives on + // the foreign mapped memory this primitive legitimately writes. __movsb emits the copy inline with no + // interceptable call. Release keeps std::memcpy. + __movsb(reinterpret_cast(addr), static_cast(source), bytes); +#else + std::memcpy(reinterpret_cast(addr), source, bytes); +#endif + return true; + } + __except (Memory::detail::is_guarded_read_fault(GetExceptionCode()) ? EXCEPTION_EXECUTE_HANDLER + : EXCEPTION_CONTINUE_SEARCH) + { + return false; + } +#else + // MinGW: write through the same guard/fallback split as seh_read_bytes. x64 uses the process-wide vectored + // guard when available; if the handler cannot be installed, and on 32-bit builds, the fallback validates the + // destination and writes through WriteProcessMemory. + return veh_write_bytes(addr, source, bytes); +#endif + } + + bool DetourModKit::Memory::seh_write_chain_bytes(uintptr_t base, std::span offsets, + const void *source, size_t bytes) noexcept + { + if (bytes == 0) + return true; + if (!source) + return false; + +#ifdef _MSC_VER + // The walk is inlined here rather than reusing resolve_chain_guarded so the resolve and the terminal write sit + // in one __try region, mirroring seh_read_chain_bytes: one uniform failure path for the whole operation. + const ptrdiff_t *const offs = offsets.data(); + const size_t count = offsets.size(); + __try + { + uintptr_t cur = base; + for (size_t i = 0; i + 1 < count; ++i) + { + uintptr_t next = 0; + std::memcpy(&next, reinterpret_cast(cur + static_cast(offs[i])), sizeof(next)); + if (!Memory::plausible_userspace_ptr(next)) + return false; + cur = next; + } + const uintptr_t final_addr = (count == 0) ? cur : cur + static_cast(offs[count - 1]); + // Same terminal-address prechecks as seh_read_chain_bytes so a low or wrapping final address fails + // identically on both toolchains before the store is attempted. + if (final_addr < SEH_READ_MIN_VALID_ADDR || final_addr + bytes < final_addr) + return false; + std::memcpy(reinterpret_cast(final_addr), source, bytes); + return true; + } + __except (Memory::detail::is_guarded_read_fault(GetExceptionCode()) ? EXCEPTION_EXECUTE_HANDLER + : EXCEPTION_CONTINUE_SEARCH) + { + return false; + } +#else + // MinGW: resolve through the shared guarded-link helper (its intermediate reads use the vectored guard), then + // write the terminal range through seh_write_bytes so the same guard covers the final store. + uintptr_t final_addr = 0; + if (!resolve_chain_guarded(base, offsets.data(), offsets.size(), final_addr)) + return false; + return Memory::seh_write_bytes(final_addr, source, bytes); +#endif + } + namespace { // PE header layout for module range resolution. Pulled into an anonymous namespace so the helper is internal to diff --git a/src/memory_internal.hpp b/src/memory_internal.hpp index d1cddea..0577b5d 100644 --- a/src/memory_internal.hpp +++ b/src/memory_internal.hpp @@ -49,21 +49,19 @@ namespace DetourModKit #if !defined(_MSC_VER) && defined(_WIN64) /** * @brief Runs @p fn(@p ctx) with the process-wide vectored read guard armed over [@p lo, @p hi). - * @details MinGW x64 has no frame-based __try / __except, so a bulk in-place foreign read -- the - * scanner's memchr / SIMD region sweep -- cannot wrap itself in SEH the way the MSVC path - * does. This routes such a read through the same vectored exception handler, thread-local - * guard slot, and drain epoch the seh_read_bytes path uses: the guard is armed for - * [lo, hi), @p fn performs the read, and a guarded read fault (is_guarded_read_fault) - * inside that range is turned into a clean failure (the handler longjmps back) instead of - * terminating the host. @p fn must be a self-contained read with no resources that need - * unwinding, because a guarded fault abandons its frame via __builtin_longjmp without - * running destructors -- exactly the contract the copy-based guard relies on. When the - * handler could not be installed @p fn is run unguarded, the same VirtualQuery-fallback - * posture the byte-copy guard takes (the caller's own readability gate is then the only - * guard). + * @details MinGW x64 has no frame-based __try / __except, so a bulk in-place foreign read -- the scanner's + * memchr / SIMD region sweep -- cannot wrap itself in SEH the way the MSVC path does. This routes + * such a read through the same vectored exception handler, thread-local guard slot, and drain + * epoch the seh_read_bytes path uses: the guard is armed for [lo, hi), @p fn performs the read, + * and a guarded read fault (is_guarded_read_fault) inside that range is turned into a clean + * failure (the handler longjmps back) instead of terminating the host. @p fn must be a + * self-contained read with no resources that need unwinding, because a guarded fault abandons its + * frame via __builtin_longjmp without running destructors -- exactly the contract the copy-based + * guard relies on. When the handler could not be installed @p fn is not run; callers treat false + * as a skipped/faulted range and fail uniqueness-sensitive work closed. * @param lo First byte of the foreign range @p fn will read. * @param hi One past the last byte of that range. An empty or wrapping range (hi <= lo) runs @p fn - * unguarded. + * directly because there is no foreign byte span to guard. * @param fn The read to perform; must be noexcept and must not throw. * @param ctx Opaque pointer forwarded to @p fn. * @return true if @p fn completed without a guarded read fault; false if a fault inside [lo, hi) was diff --git a/src/rtti.cpp b/src/rtti.cpp index e16fdb9..a6fed9b 100644 --- a/src/rtti.cpp +++ b/src/rtti.cpp @@ -572,11 +572,20 @@ namespace DetourModKit return cached != 0 ? std::optional(cached) : std::nullopt; } - // First use: resolve and cache. Concurrent first-callers converge on the same vtable, so a benign - // double-resolve needs no lock. + // First use (or a retry after an earlier miss): resolve and cache. Concurrent first-callers converge on the + // same vtable, so a benign double-resolve needs no lock. const auto resolved = vtable_for_type(m_mangled, m_range); - m_cached.store(resolved.value_or(0), std::memory_order_relaxed); - m_resolved.store(true, std::memory_order_release); + + // Latch only a SUCCESSFUL resolve as permanent. A failed resolve is not cached: the vtable may simply not be + // mapped yet (the owning module loads later, or a game patch is mid-relocation), so leaving m_resolved false + // lets a subsequent call retry once the type becomes resolvable rather than wedging on a stale miss forever. + // The store of m_cached is published with release before m_resolved, so the fast path's acquire-load that sees + // m_resolved == true also sees the cached value. + if (resolved) + { + m_cached.store(*resolved, std::memory_order_release); + m_resolved.store(true, std::memory_order_release); + } return resolved; } diff --git a/src/rtti_dissect.cpp b/src/rtti_dissect.cpp index 5223199..a6026eb 100644 --- a/src/rtti_dissect.cpp +++ b/src/rtti_dissect.cpp @@ -353,11 +353,16 @@ namespace DetourModKit return std::unexpected(HealError::BadDescriptor); std::size_t required_count = 0; - for (const Landmark &lm : fp) + for (std::size_t i = 0; i < fp.size(); ++i) { - if (!descriptor_ok(lm)) + if (!descriptor_ok(fp[i])) return std::unexpected(HealError::BadDescriptor); - if (lm.required) + for (std::size_t j = 0; j < i; ++j) + { + if (fp[j].nominal_offset == fp[i].nominal_offset) + return std::unexpected(HealError::BadDescriptor); + } + if (fp[i].required) ++required_count; } // A template with no required landmark cannot fail closed against a dense region, so it is rejected rather than diff --git a/src/scanner.cpp b/src/scanner.cpp index fc095f5..f84c077 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -989,6 +989,19 @@ namespace DetourModKit return std::unexpected(RipResolveError::PrefixNotFound); } + // Per-thread "the most recently measured sweep window skipped a faulted region" flag. scan_regions_filtered ORs it + // true (never clears it) whenever it skips a region that faulted mid-scan, so a faulted sweep is observable to the + // cascade layer in its own TU without changing any scan return type. It is thread_local because two threads + // scanning concurrently must not see each other's fault state; the cascade clears it before a measurement window + // and reads it after, so a stale value from an unrelated earlier scan on the same thread cannot leak into a later + // verdict. The flag is advisory accounting for the fail-closed uniqueness check -- it never alters which address a + // scan returns. + bool &Scanner::detail::scan_incomplete_flag() noexcept + { + thread_local bool incomplete = false; + return incomplete; + } + namespace { // Scan one protection-gated region for the next needed match, decrementing matches_remaining for each non-self @@ -1035,6 +1048,12 @@ namespace DetourModKit const Scanner::CompiledPattern &pattern, uintptr_t needle_lo, uintptr_t needle_hi, size_t &matches_remaining, bool &out_faulted) noexcept { + // The 64-bit-only contract, asserted locally so the unguarded 32-bit MinGW arm of this function (the bare + // #else below, which runs scan_region_for_match with no vectored fault guard) is provably unreachable in + // any build that compiles: a 32-bit build fails here at compile time rather than silently shipping a sweep + // whose only TOCTOU protection is the per-region VirtualQuery gate. + static_assert(sizeof(void *) == 8, "scan_region_guarded requires a 64-bit target: the MinGW fault guard " + "(run_guarded_region) is x64-only and the 32-bit arm is unguarded."); out_faulted = false; #ifdef _MSC_VER const size_t original_matches_remaining = matches_remaining; @@ -1138,6 +1157,42 @@ namespace DetourModKit bool prev_accepted = false; uintptr_t prev_accept_hi = 0; uintptr_t run_lo = 0; + auto report_faulted_regions = [&]() noexcept + { + if (faulted_regions == 0) + return; + + // Surface the incomplete-scan state to the cascade layer (its own TU) so a uniqueness / occurrence + // count run over a window that skipped a faulted region can fail closed: a hidden match could live in + // the skipped bytes, so a count taken here is a lower bound, not a proof. OR-set (never clear) so a + // fault in any one of several scans within a caller's measurement window stays observable until the + // caller reads and clears it. + Scanner::detail::scan_incomplete_flag() = true; + + // Best-effort diagnosis only; the sweep already skipped each faulted region and continued. + try + { + (void)Logger::get_instance().try_log( + LogLevel::Debug, + "Scanner: skipped {} region(s) that faulted mid-scan (concurrent decommit/reprotect).", + faulted_regions); + } + catch (...) + { + } + + // Surface the same skipped-region count to subscribers as a typed event. The dispatcher is lazy and can + // allocate on first use, so diagnostics must never change the scan result. + try + { + Diagnostics::scanner_faults().emit_safe(Diagnostics::ScannerFaultEvent{ + .faulted_regions = faulted_regions, .window_low = window_lo, .window_high = window_hi}); + } + catch (...) + { + } + faulted_regions = 0; + }; while (addr < window_hi && VirtualQuery(reinterpret_cast(addr), &mbi, sizeof(mbi))) { @@ -1188,7 +1243,10 @@ namespace DetourModKit const std::byte *result = scan_region_guarded(region_start, scan_size, pattern, needle_lo, needle_hi, matches_remaining, region_faulted); if (result != nullptr) + { + report_faulted_regions(); return result; + } if (region_faulted) ++faulted_regions; } @@ -1207,27 +1265,7 @@ namespace DetourModKit addr = region_end; } - if (faulted_regions > 0) - { - // Best-effort diagnosis only; the sweep already skipped each faulted region and continued. try_log - // keeps this noexcept path honest when the Debug level is enabled. - (void)Logger::get_instance().try_log( - LogLevel::Debug, - "Scanner: skipped {} region(s) that faulted mid-scan (concurrent decommit/reprotect).", - faulted_regions); - - // Surface the same skipped-region count to subscribers as a typed event. The dispatcher is lazy and can - // allocate on first use, so diagnostics must never change the scan result. - try - { - Diagnostics::scanner_faults().emit_safe(Diagnostics::ScannerFaultEvent{ - .faulted_regions = faulted_regions, .window_low = window_lo, .window_high = window_hi}); - } - catch (...) - { - } - } - + report_faulted_regions(); return nullptr; } diff --git a/src/scanner_cascade.cpp b/src/scanner_cascade.cpp index 90e0df3..fdce0d0 100644 --- a/src/scanner_cascade.cpp +++ b/src/scanner_cascade.cpp @@ -249,15 +249,26 @@ namespace DetourModKit // evidence. When first_match_out is non-null it receives the first occurrence (the n == 1 scan result, or // nullptr when there were no hits) so a caller that needs both the count and the first match -- the same scan, // same scope -- does not have to sweep again to fetch it. + // + // When incomplete_out is non-null it receives whether any scan in this count skipped a region that faulted + // mid-scan. A faulted region means the returned count is a lower bound: a second occurrence could have lived in + // the skipped bytes, so a count of 1 over an incomplete sweep does NOT prove uniqueness. The flag is cleared + // here once before the count loop and read after, so the result reflects only the scans this call performed + // (not a stale fault from an earlier scan on the same thread). The count itself is unchanged; the caller + // decides how to treat the incomplete signal. std::size_t count_pattern_hits_bounded(const DetourModKit::Scanner::CompiledPattern &pattern, std::size_t max_hits, std::optional range, - const std::byte **first_match_out = nullptr) noexcept + const std::byte **first_match_out = nullptr, + bool *incomplete_out = nullptr) noexcept { if (first_match_out != nullptr) { *first_match_out = nullptr; } + // Clear once before the loop so the flag reflects only the scans below; the region walk OR-sets it on a + // faulted skip and never clears it, so a prior scan on this thread must not bleed into this verdict. + DetourModKit::Scanner::detail::scan_incomplete_flag() = false; std::size_t hits = 0; for (std::size_t n = 1; n <= max_hits + 1; ++n) { @@ -273,6 +284,10 @@ namespace DetourModKit } ++hits; } + if (incomplete_out != nullptr) + { + *incomplete_out = DetourModKit::Scanner::detail::scan_incomplete_flag(); + } return hits; } @@ -423,12 +438,27 @@ namespace DetourModKit } applicable = true; const std::byte *first_match = nullptr; - const std::size_t hits = - count_pattern_hits_bounded(*compiled, PROLOGUE_FALLBACK_MAX_HITS, range, &first_match); + bool count_incomplete = false; + const std::size_t hits = count_pattern_hits_bounded(*compiled, PROLOGUE_FALLBACK_MAX_HITS, range, + &first_match, &count_incomplete); if (hits == 0) { return std::nullopt; } + // Fail closed when the uniqueness count ran over an incomplete sweep. A region that faulted mid-scan + // (concurrent decommit / reprotect) was skipped, so a second occurrence of the rebuilt jump pattern could + // have lived in the unscanned bytes. The fallback's whole safety rests on the rebuilt pattern matching + // exactly once -- the single site a sibling mod inline-hooked -- so an unproven count must be treated as + // ambiguous, not as a unique hit. Rejecting here lets the cascade fall through to the next candidate / + // shape rather than commit a hook at a possibly-wrong address whose blast radius (an unrelated function) + // is severe. + if (count_incomplete) + { + logger.debug("Scanner: prologue fallback rejected for '{}': uniqueness count incomplete (a region " + "faulted mid-scan); uniqueness unproven, failing closed", + candidate.name.empty() ? std::string_view{""} : candidate.name); + return std::nullopt; + } if (hits > PROLOGUE_FALLBACK_MAX_HITS) { logger.debug("Scanner: prologue fallback rejected for '{}': {} hits exceed uniqueness ceiling ({})", diff --git a/src/scanner_internal.hpp b/src/scanner_internal.hpp index c8a4c9b..e28e472 100644 --- a/src/scanner_internal.hpp +++ b/src/scanner_internal.hpp @@ -93,6 +93,19 @@ namespace DetourModKit * address (VirtualQuery failure) or a guard / no-access page. */ [[nodiscard]] bool is_executable_address(std::uintptr_t address) noexcept; + + /** + * @brief Per-thread flag set when a region sweep skipped a region that faulted mid-scan. + * @details scan_regions_filtered ORs this true (never clears it) when it skips a region that faulted under + * the TOCTOU guard, so an out-of-TU caller (the cascade resolver) can tell that an occurrence + * count it just ran is only a lower bound: a hidden match could live in the skipped bytes. + * thread_local so two threads scanning concurrently do not see each other's fault state. The + * reader clears it before a measurement window and reads it after, so a stale fault from an + * earlier scan on the same thread cannot bleed into a later verdict. Advisory accounting only -- + * it never changes which address a scan returns. + * @return Reference to the calling thread's incomplete-scan flag. + */ + [[nodiscard]] bool &scan_incomplete_flag() noexcept; } // namespace detail } // namespace Scanner } // namespace DetourModKit diff --git a/src/string_xref.cpp b/src/string_xref.cpp index ccd194f..73ae8eb 100644 --- a/src/string_xref.cpp +++ b/src/string_xref.cpp @@ -189,8 +189,11 @@ namespace DetourModKit // window with one VirtualQuery; a concurrent decommit / reprotect before these unguarded byte reads complete // would otherwise fault the host. On MSVC the body runs inside a __try / __except that swallows exactly the // foreign-read faults (Memory::detail::is_guarded_read_fault) and reports the window faulted so the sweep skips - // it; MinGW has no structured exception handling and runs the body directly (the VirtualQuery gate is the only - // guard available there). Mirrors scan_region_guarded in scanner.cpp. Returns true when a fault was swallowed. + // it. On MinGW x64 the body runs through the same process-wide vectored read guard the seh_read paths use + // (Memory::detail::run_guarded_region), so the fault is swallowed and the window skipped + counted there too; + // only on 32-bit MinGW, where that x64-only vectored guard is unavailable, does the body run directly behind + // just the VirtualQuery gate. Mirrors scan_region_guarded in scanner.cpp. Returns true when a fault was + // swallowed. bool scan_window_narrow_guarded(const Scanner::detail::ExecutableWindow &window, std::uintptr_t string_addr, std::size_t instr_len, std::size_t &found_count, std::uintptr_t &first_site, LeaReferenceInfo *info) noexcept @@ -217,6 +220,44 @@ namespace DetourModKit } return true; } +#elif defined(_WIN64) + // MinGW x64: arm the process-wide vectored read guard over exactly the bytes the window gate proved + // readable, so a concurrent decommit / reprotect that faults the scan is swallowed and the window reported + // faulted -- the same skip-the-window contract the MSVC __except arm and scanner.cpp's scan_region_guarded + // follow. + const std::size_t original_found_count = found_count; + const std::uintptr_t original_first_site = first_site; + const LeaReferenceInfo original_info = (info != nullptr) ? *info : LeaReferenceInfo{}; + struct NarrowScanContext + { + const Scanner::detail::ExecutableWindow *window; + std::uintptr_t string_addr; + std::size_t instr_len; + std::size_t *found_count; + std::uintptr_t *first_site; + LeaReferenceInfo *info; + } scan_ctx{&window, string_addr, instr_len, &found_count, &first_site, info}; + + const auto run_scan = [](void *opaque) noexcept -> void + { + auto *context = static_cast(opaque); + scan_window_narrow_body(*context->window, context->string_addr, context->instr_len, + *context->found_count, *context->first_site, context->info); + }; + + if (Memory::detail::run_guarded_region(window.base, window.base + window.span, run_scan, &scan_ctx)) + { + return false; + } + // Faulted: discard any partial count / site / lea info so a partially-scanned window cannot leak a stale + // site or register, exactly as the MSVC arm does. + found_count = original_found_count; + first_site = original_first_site; + if (info != nullptr) + { + *info = original_info; + } + return true; #else scan_window_narrow_body(window, string_addr, instr_len, found_count, first_site, info); return false; @@ -468,6 +509,35 @@ namespace DetourModKit first_site = original_first_site; return true; } +#elif defined(_WIN64) + // MinGW x64: same vectored read guard as the narrow sibling, armed over the gated window bytes; a fault is + // swallowed and the window reported faulted. Only 32-bit MinGW falls back to the bare VirtualQuery gate. + const std::size_t original_found_count = found_count; + const std::uintptr_t original_first_site = first_site; + struct BroadScanContext + { + const ZydisDecoder *decoder; + const Scanner::detail::ExecutableWindow *window; + std::uintptr_t string_addr; + std::size_t *found_count; + std::uintptr_t *first_site; + } scan_ctx{&decoder, &window, string_addr, &found_count, &first_site}; + + const auto run_scan = [](void *opaque) noexcept -> void + { + auto *context = static_cast(opaque); + scan_window_broad_body(*context->decoder, *context->window, context->string_addr, *context->found_count, + *context->first_site); + }; + + if (Memory::detail::run_guarded_region(window.base, window.base + window.span, run_scan, &scan_ctx)) + { + return false; + } + // Faulted: discard any partial count / site so a partially-scanned window cannot leak a stale site. + found_count = original_found_count; + first_site = original_first_site; + return true; #else scan_window_broad_body(decoder, window, string_addr, found_count, first_site); return false; diff --git a/tests/test_config.cpp b/tests/test_config.cpp index 85b0aab..f0d13b0 100644 --- a/tests/test_config.cpp +++ b/tests/test_config.cpp @@ -760,6 +760,72 @@ TEST_F(ConfigTest, NegativeIntValue) EXPECT_EQ(val, -50); } +TEST_F(ConfigTest, IntValueAboveIntMaxFallsBackToDefault) +{ + std::ofstream ini_file(m_test_ini_file); + ini_file << "[TestSection]\n"; + // 5000000000 is larger than INT_MAX. The 64-bit parse can observe that instead of accepting the saturated 32-bit + // strtol value, so the range check must fall back to the registered default. + ini_file << "TestInt=5000000000\n"; + ini_file.close(); + + int val = 0; + + Config::register_int("TestSection", "TestInt", "val", [&val](int v) { val = v; }, 7); + + EXPECT_NO_THROW(Config::load(m_test_ini_file.string())); + EXPECT_EQ(val, 7); +} + +TEST_F(ConfigTest, IntValueBelowIntMinFallsBackToDefault) +{ + std::ofstream ini_file(m_test_ini_file); + ini_file << "[TestSection]\n"; + // -5000000000 is below INT_MIN; the symmetric lower-bound branch must also fall back to the registered default. + ini_file << "TestInt=-5000000000\n"; + ini_file.close(); + + int val = 0; + + Config::register_int("TestSection", "TestInt", "val", [&val](int v) { val = v; }, -3); + + EXPECT_NO_THROW(Config::load(m_test_ini_file.string())); + EXPECT_EQ(val, -3); +} + +TEST_F(ConfigTest, IntValueLeadingZeroStaysDecimal) +{ + std::ofstream ini_file(m_test_ini_file); + ini_file << "[TestSection]\n"; + // Match SimpleIni's historical GetLongValue behavior: only 0x switches to hex, so a leading zero is not octal. + ini_file << "TestInt=010\n"; + ini_file.close(); + + int val = 0; + + Config::register_int("TestSection", "TestInt", "val", [&val](int v) { val = v; }, 7); + + EXPECT_NO_THROW(Config::load(m_test_ini_file.string())); + EXPECT_EQ(val, 10); +} + +TEST_F(ConfigTest, IntValueNonNumericFallsBackToDefault) +{ + std::ofstream ini_file(m_test_ini_file); + ini_file << "[TestSection]\n"; + // A non-numeric value never fully consumes under strtoll (the end pointer stays at the start), so the parse must + // reject it and fall back to the registered default rather than committing strtoll's zero result. + ini_file << "TestInt=abc\n"; + ini_file.close(); + + int val = 0; + + Config::register_int("TestSection", "TestInt", "val", [&val](int v) { val = v; }, 5); + + EXPECT_NO_THROW(Config::load(m_test_ini_file.string())); + EXPECT_EQ(val, 5); +} + TEST_F(ConfigTest, MissingSectionInFile) { std::ofstream ini_file(m_test_ini_file); diff --git a/tests/test_memory.cpp b/tests/test_memory.cpp index e432ec0..f125d4b 100644 --- a/tests/test_memory.cpp +++ b/tests/test_memory.cpp @@ -2550,3 +2550,103 @@ TEST_F(MemoryTest, UnarmedThreadFaultIsPassedThroughNotClaimed) EXPECT_EQ(observed.load(), 0u); VirtualFree(page, 0, MEM_RELEASE); } + +// Coverage for the per-frame guarded WRITE primitives (seh_write_bytes, seh_write). They write to already-writable +// memory under a fault guard, changing no page protection and running no i-cache flush or cache invalidation, so they +// need no cache state; the fixture's cache is left in its default state. + +TEST_F(MemoryTest, SehWrite_TypedRoundTripsToLocal) +{ + uint64_t target = 0; + const uint64_t expected = 0x1122334455667788ull; + + EXPECT_TRUE(Memory::seh_write(reinterpret_cast(&target), expected)); + EXPECT_EQ(target, expected); + + // Read it back through the guarded read primitive to confirm the byte image matches. + const auto value = Memory::seh_read(reinterpret_cast(&target)); + ASSERT_TRUE(value.has_value()); + EXPECT_EQ(*value, expected); +} + +TEST_F(MemoryTest, SehWriteBytes_RoundTripsToHeap) +{ + auto buffer = std::make_unique(16); + std::memset(buffer.get(), 0, 16); + const std::byte source[] = {std::byte{0xDE}, std::byte{0xAD}, std::byte{0xBE}, std::byte{0xEF}}; + + EXPECT_TRUE(Memory::seh_write_bytes(reinterpret_cast(buffer.get()), source, sizeof(source))); + + for (size_t i = 0; i < sizeof(source); ++i) + { + EXPECT_EQ(buffer[i], source[i]); + } + // Bytes beyond the written span are untouched. + EXPECT_EQ(buffer[sizeof(source)], std::byte{0x00}); +} + +TEST_F(MemoryTest, SehWriteBytes_ZeroBytesIsNoOpSuccess) +{ + uint64_t target = 0xFEEDFACEull; + const uint64_t source = 0x0; + + // Zero-byte write is a no-op success that leaves the target unchanged. + EXPECT_TRUE(Memory::seh_write_bytes(reinterpret_cast(&target), &source, 0)); + EXPECT_EQ(target, 0xFEEDFACEull); +} + +TEST_F(MemoryTest, SehWriteBytes_NullSourceReturnsFalse) +{ + uint64_t target = 0xFEEDFACEull; + + // nullptr source is rejected without a write. + EXPECT_FALSE(Memory::seh_write_bytes(reinterpret_cast(&target), nullptr, sizeof(target))); + EXPECT_EQ(target, 0xFEEDFACEull); +} + +TEST_F(MemoryTest, SehWriteBytes_LowAddressReturnsFalse) +{ + const uint32_t source = 0x11223344u; + + // An address below 0x10000 (the Windows reserved low range) is rejected without a write and must not crash. + EXPECT_FALSE(Memory::seh_write_bytes(static_cast(0x100), &source, sizeof(source))); +} + +// FAULT-GUARD proof: a write into a committed PAGE_READONLY page must be caught by the SEH/VEH guard and return false, +// not fault the host, and must leave the page bytes unchanged. The page is held until TearDown via the local scope +// (never MEM_RELEASE'd before the assertions) so a released VA cannot be remapped and the fault stays deterministic. +TEST_F(MemoryTest, SehWriteBytes_ReadOnlyPageFaultsClosed) +{ + // A page seeded as PAGE_READWRITE so we can plant a known sentinel, then flipped to PAGE_READONLY before the write. + void *page = VirtualAlloc(nullptr, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + ASSERT_NE(page, nullptr); + + constexpr uint32_t kSentinel = 0xA5A5A5A5u; + *reinterpret_cast(page) = kSentinel; + + DWORD old_protect = 0; + ASSERT_NE(VirtualProtect(page, 4096, PAGE_READONLY, &old_protect), 0); + + const uint32_t source = 0xDEADBEEFu; + // The guarded write must fail closed: the access violation is swallowed and false is returned. + EXPECT_FALSE(Memory::seh_write_bytes(reinterpret_cast(page), &source, sizeof(source))); + + // Restore write access to read the page back and prove the bytes never changed. + ASSERT_NE(VirtualProtect(page, 4096, old_protect, &old_protect), 0); + EXPECT_EQ(*reinterpret_cast(page), kSentinel); + + VirtualFree(page, 0, MEM_RELEASE); +} + +// FAULT-GUARD proof, no-access variant: a write into a committed PAGE_NOACCESS page must also be caught and return +// false. The page is held until after the assertion (never released early) so the fault is deterministic. +TEST_F(MemoryTest, SehWriteBytes_NoAccessPageFaultsClosed) +{ + void *page = VirtualAlloc(nullptr, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_NOACCESS); + ASSERT_NE(page, nullptr); + + const uint32_t source = 0xDEADBEEFu; + EXPECT_FALSE(Memory::seh_write_bytes(reinterpret_cast(page), &source, sizeof(source))); + + VirtualFree(page, 0, MEM_RELEASE); +} diff --git a/tests/test_memory_chain.cpp b/tests/test_memory_chain.cpp index 8e0643a..d8e1f50 100644 --- a/tests/test_memory_chain.cpp +++ b/tests/test_memory_chain.cpp @@ -182,3 +182,120 @@ TEST(MemorySehReadChain, ReadsNonDefaultConstructibleType) EXPECT_EQ(value->a, 0xAABBCCDDu); EXPECT_EQ(value->b, 0x11223344u); } + +// Coverage for the pointer-chain write primitives (seh_write_chain, seh_write_chain_bytes). These write to +// already-writable in-process memory, so no game memory, cache state, or page-protection change is required. + +TEST(MemorySehWriteChain, TwoLevelWritesTypedValueRoundTrips) +{ + uint64_t target = 0; + uintptr_t mid = reinterpret_cast(&target); + uintptr_t root = reinterpret_cast(&mid); + + // deref(&root) -> &mid, deref(&mid) -> ⌖ final offset 0 not dereferenced. The write lands in target. + const uint64_t expected = 0x1122334455667788ull; + EXPECT_TRUE(Memory::seh_write_chain(reinterpret_cast(&root), {0, 0, 0}, expected)); + EXPECT_EQ(target, expected); + + // Read it back through the same chain to confirm the resolve agrees both directions. + const auto value = Memory::seh_read_chain(reinterpret_cast(&root), {0, 0, 0}); + ASSERT_TRUE(value.has_value()); + EXPECT_EQ(*value, expected); +} + +TEST(MemorySehWriteChain, WritesIntoNestedStructFieldViaRealOffsets) +{ + // A struct with a nested pointer and an array field, so the chain walks a real intra-object layout. The first link + // dereferences the embedded pointer; the final offset selects the array element address without dereferencing it. + struct Inner + { + uint32_t header; + uint32_t cells[4]; + }; + struct Outer + { + Inner *inner; + uint64_t pad; + }; + + Inner inner{0xDEADBEEFu, {0, 0, 0, 0}}; + Outer outer{&inner, 0}; + + // Chain semantics: every offset but the last is dereferenced. From &outer, the first offset lands on the embedded + // Outer::inner pointer and is dereferenced to reach &inner; the final offset selects inner.cells[2]'s address + // without dereferencing it. deref(&outer + offsetof(inner)) -> &inner, then + offsetof(cells) + 2 * + // sizeof(uint32_t). + const ptrdiff_t to_inner = static_cast(offsetof(Outer, inner)); + const ptrdiff_t to_cell2 = static_cast(offsetof(Inner, cells) + 2 * sizeof(uint32_t)); + const uint32_t expected = 0xCAFEF00Du; + EXPECT_TRUE(Memory::seh_write_chain(reinterpret_cast(&outer), {to_inner, to_cell2}, expected)); + + EXPECT_EQ(inner.cells[2], expected); + // Neighbouring fields are untouched. + EXPECT_EQ(inner.header, 0xDEADBEEFu); + EXPECT_EQ(inner.cells[1], 0u); + EXPECT_EQ(inner.cells[3], 0u); +} + +TEST(MemorySehWriteChain, EmptyOffsetsWritesAtBase) +{ + uint32_t target = 0; + const uint32_t expected = 0xABCD1234u; + + // An empty offset span writes at base unchanged. + EXPECT_TRUE(Memory::seh_write_chain_bytes(reinterpret_cast(&target), std::span{}, + &expected, sizeof(expected))); + EXPECT_EQ(target, expected); +} + +TEST(MemorySehWriteChain, ZeroBytesIsNoOpSuccess) +{ + uint64_t target = 0xFEEDFACEull; + const uint64_t source = 0x0; + + // Zero-byte write is a no-op success and must leave the target untouched. + EXPECT_TRUE( + Memory::seh_write_chain_bytes(reinterpret_cast(&target), std::span{}, &source, 0)); + EXPECT_EQ(target, 0xFEEDFACEull); +} + +TEST(MemorySehWriteChain, NullSourceReturnsFalse) +{ + uint64_t target = 0xFEEDFACEull; + + // nullptr source is rejected and the target is unchanged. + EXPECT_FALSE(Memory::seh_write_chain_bytes(reinterpret_cast(&target), std::span{}, + nullptr, sizeof(target))); + EXPECT_EQ(target, 0xFEEDFACEull); +} + +TEST(MemorySehWriteChain, ImplausibleLinkFailsClosed) +{ + // The first dereference yields a null pointer, which fails the plausibility screen before any write is attempted, + // mirroring the seh_resolve_chain implausible-link test. + uintptr_t null_holder = 0; + const uint32_t source = 0x11223344u; + EXPECT_FALSE( + Memory::seh_write_chain_bytes(reinterpret_cast(&null_holder), {0, 0}, &source, sizeof(source))); +} + +TEST(MemorySehWriteChain, InitializerListMatchesSpanOverload) +{ + uint32_t target_list = 0; + uint32_t target_span = 0; + uintptr_t holder_list = reinterpret_cast(&target_list); + uintptr_t holder_span = reinterpret_cast(&target_span); + + const uint32_t expected = 0x99AABBCCu; + + // The braced-list overload and the explicit span overload must behave identically. + EXPECT_TRUE(Memory::seh_write_chain(reinterpret_cast(&holder_list), {0, 0}, expected)); + + const ptrdiff_t offs[] = {0, 0}; + EXPECT_TRUE(Memory::seh_write_chain(reinterpret_cast(&holder_span), + std::span(offs), expected)); + + EXPECT_EQ(target_list, expected); + EXPECT_EQ(target_span, expected); + EXPECT_EQ(target_list, target_span); +} diff --git a/tests/test_platform.cpp b/tests/test_platform.cpp index 24c86cd..263b02b 100644 --- a/tests/test_platform.cpp +++ b/tests/test_platform.cpp @@ -39,9 +39,13 @@ TEST(VersionTest, VersionStringIsNonEmpty) TEST(VersionTest, MakeVersionEncoding) { - EXPECT_EQ(DMK_MAKE_VERSION(2, 3, 0), 20300); - EXPECT_EQ(DMK_MAKE_VERSION(1, 0, 0), 10000); + EXPECT_EQ(DMK_MAKE_VERSION(2, 3, 0), 2003000); + EXPECT_EQ(DMK_MAKE_VERSION(1, 0, 0), 1000000); EXPECT_EQ(DMK_MAKE_VERSION(0, 0, 1), 1); + // The minor and patch fields reserve three decimal digits each (0..999), so a multi-digit minor no longer collides + // and a higher minor can never overtake the next major. + EXPECT_EQ(DMK_MAKE_VERSION(0, 100, 0), 100000); + EXPECT_GT(DMK_MAKE_VERSION(1, 0, 0), DMK_MAKE_VERSION(0, 999, 999)); } TEST(VersionTest, VersionAtLeast) diff --git a/tests/test_rtti_dissect.cpp b/tests/test_rtti_dissect.cpp index 3d2b778..eef3383 100644 --- a/tests/test_rtti_dissect.cpp +++ b/tests/test_rtti_dissect.cpp @@ -479,6 +479,38 @@ TEST_F(RttiDissectTest, IdentifyOr_NoFallbacksDegradesToPrimary) EXPECT_EQ(pt.name(), ".?AVNoFallback@@"); } +TEST_F(RttiDissectTest, IdentifyOr_AllFailResetsOutToDefault) +{ + // A prior success leaves out fully populated; a later all-fail cascade must wipe out back to a default-constructed + // PointeeType, so a caller that ignores the error never reads a stale name/vtable from the earlier resolve. + SyntheticVtable prior(".?AVPriorResolve@@"); + std::array prior_slot{prior.vtable()}; + const std::uintptr_t prior_addr = reinterpret_cast(prior_slot.data()); + + Rtti::PointeeType pt; + const auto first = Rtti::identify_pointee_type_or(prior_addr, pt); + ASSERT_TRUE(first.has_value()); + ASSERT_FALSE(pt.name().empty()); + ASSERT_NE(pt.vtable, 0u); + + std::array garbage_a{0xDEADBEEFu}; + std::array garbage_b{0xDEADBEEFu}; + const std::uintptr_t garbage_a_addr = reinterpret_cast(garbage_a.data()); + const std::uintptr_t garbage_b_addr = reinterpret_cast(garbage_b.data()); + + const auto miss = Rtti::identify_pointee_type_or(garbage_a_addr, pt, garbage_b_addr); + ASSERT_FALSE(miss.has_value()); + + // out is the value-initialized default, not the prior success. + EXPECT_TRUE(pt.name().empty()); + EXPECT_EQ(pt.name_len, 0u); + EXPECT_EQ(pt.vtable, 0u); + EXPECT_EQ(pt.col_addr, 0u); + EXPECT_EQ(pt.object_base, 0u); + EXPECT_EQ(pt.pointer_value, 0u); + EXPECT_FALSE(pt.was_pointer); +} + TEST_F(RttiDissectTest, Identify_ErrorStringsAreDistinct) { EXPECT_NE(Rtti::identify_error_to_string(Rtti::IdentifyError::BadSlotAddress), @@ -1249,6 +1281,28 @@ TEST_F(RttiDissectTest, Fingerprint_SingleLandmarkMatchesHeal) EXPECT_EQ(healed->healed_offset - static_cast(FP_OA), solved->delta); } +TEST_F(RttiDissectTest, Fingerprint_DuplicateOffsetIsBadDescriptor) +{ + // Two required landmarks that share a nominal_offset probe the same shifted slot. Counting both would inflate the + // corroboration score and report stronger agreement than the template actually provides, so a duplicate offset is + // a malformed template and must fail before any memory is probed. + FpTypes ty; + SynStruct st; + st.put(FP_OA, syn_heap_object(ty.a.vtable())); + st.put(FP_OB, syn_heap_object(ty.b.vtable())); + st.put(FP_OC, syn_heap_object(ty.c.vtable())); + + const std::array with_dup{ + Rtti::Landmark{.base = st.base(), .nominal_offset = FP_OA, .expected_mangled = ".?AVFpA@@"}, + Rtti::Landmark{.base = st.base(), .nominal_offset = FP_OB, .expected_mangled = ".?AVFpB@@"}, + Rtti::Landmark{.base = st.base(), .nominal_offset = FP_OC, .expected_mangled = ".?AVFpC@@"}, + Rtti::Landmark{.base = st.base(), .nominal_offset = FP_OA, .expected_mangled = ".?AVFpA@@"}, + }; + const auto hit = Rtti::solve_fingerprint(st.base(), with_dup, 0x20); + ASSERT_FALSE(hit.has_value()); + EXPECT_EQ(hit.error(), Rtti::HealError::BadDescriptor); +} + TEST_F(RttiDissectTest, Fingerprint_AllocatesNothing) { // solve_fingerprint documents an allocation-free contract; like the heal path it reuses one stack PointeeType and diff --git a/tests/test_rtti_reverse.cpp b/tests/test_rtti_reverse.cpp index 8039262..89ea212 100644 --- a/tests/test_rtti_reverse.cpp +++ b/tests/test_rtti_reverse.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include @@ -240,6 +242,28 @@ TEST_F(RttiReverseTest, TypeIdentityUnresolvedNeverMatches) EXPECT_FALSE(id.matches(0x1000)); } +TEST_F(RttiReverseTest, TypeIdentityFailedResolveRetriesWhenTypeAppearsLater) +{ + // A failed resolve must NOT latch permanently: the owning module may map the type later (a DLL loads, or a patch + // finishes relocating the vtable). Scope the identity to the whole pool, miss while the type is absent, then build + // the synth and confirm the next call resolves rather than staying wedged on the earlier miss. + const std::uintptr_t pool_base = reinterpret_cast(s_rev_pool.data()); + const Memory::ModuleRange full_pool{pool_base, pool_base + s_rev_pool.size()}; + + Rtti::TypeIdentity id(".?AVRevLateBind@@", full_pool); + EXPECT_FALSE(id.vtable().has_value()); + + const std::uintptr_t vt = build_synth(".?AVRevLateBind@@", 0); + ASSERT_NE(vt, 0u); + + const auto resolved = id.vtable(); + ASSERT_TRUE(resolved.has_value()); + EXPECT_EQ(*resolved, vt); + + // The successful resolve now latches: the warm path returns the same value. + EXPECT_EQ(id.vtable(), resolved); +} + TEST_F(RttiReverseTest, FindsFixtureViaHostModuleSectionWalk) { // Resolve against the real host EXE range so collect_rtti_scan_ranges parses actual PE section headers and sweeps @@ -272,6 +296,21 @@ TEST_F(RttiReverseTest, VtablesForTypeTruncatesButReportsFullCount) EXPECT_EQ(all[0], primary); } +TEST_F(RttiReverseTest, TypeIdentityRejectsStringTemporaryAtCompileTime) +{ + // TypeIdentity stores the name as a non-owning view, so a std::string rvalue would dangle the moment the + // constructor returns. The deleted std::string&& overload rejects that temporary at compile time, while a literal + // (const char*), a std::string_view, and a long-lived std::string lvalue all bind the view safely. + static_assert(!std::is_constructible_v, + "a std::string temporary must be rejected (the stored view would dangle)"); + static_assert(std::is_constructible_v, + "a string literal (const char*) must construct"); + static_assert(std::is_constructible_v, "a std::string_view must construct"); + static_assert(std::is_constructible_v, + "a long-lived std::string lvalue must construct"); + SUCCEED(); +} + TEST_F(RttiReverseTest, VtablesForTypeOrdersByColOffset) { // Plant the sub-objects in non-ascending COL.offset discovery order (the pool lays them out by allocation address) diff --git a/tests/test_scanner.cpp b/tests/test_scanner.cpp index 6322c96..5cd259f 100644 --- a/tests/test_scanner.cpp +++ b/tests/test_scanner.cpp @@ -4122,19 +4122,17 @@ TEST(ScannerBoundaryTest, FindsPatternStraddlingAdjacentExecutableRegions) VirtualFree(base, 0, MEM_RELEASE); } -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(_WIN64) // The per-region VirtualQuery gate in scan_regions_filtered proves a region readable only at gate time. // scan_region_guarded backstops a concurrent decommit / reprotect that races the unguarded find_pattern_raw reads: -// without the __try, the faulting read would terminate the process. This test sweeps a committed range repeatedly -// (through the module-scoped readable entry point, which routes through scan_regions_filtered) while a second thread -// decommits and recommits an interior page. A pre-race positive control proves the refactored scan body still finds a -// planted needle; the loop then asserts the sweep survives every iteration (a crash is the regression) and never -// reports a false match for an absent needle. A run where the decommit never lands inside the read window is a valid -// pass for the fault path; the __except / skip-the-region mechanism is pinned deterministically by -// MemoryGuardedReadFault and the seh_read_bytes NoAccess / GuardPage tests in test_memory.cpp. MSVC-only: MinGW has no -// structured exception handling, and the vectored handler that guards the seh_read primitives does not extend to this -// sweep's bulk find_pattern_raw reads, so the per-region VirtualQuery gate is the only guard there and the race can -// still fault. +// without the __try / VEH guard, the faulting read would terminate the process. This test sweeps a committed range +// repeatedly (through the module-scoped readable entry point, which routes through scan_regions_filtered) while a +// second thread decommits and recommits an interior page. A pre-race positive control proves the refactored scan body +// still finds a planted needle; the loop then asserts the sweep survives every iteration (a crash is the regression) +// and never reports a false match for an absent needle. A run where the decommit never lands inside the read window is +// a valid pass for the fault path; the __except / VEH skip-the-region mechanism is pinned deterministically by +// MemoryGuardedReadFault and the seh_read_bytes NoAccess / GuardPage tests in test_memory.cpp. 32-bit MinGW is excluded +// because the process-wide vectored guard is x64-only there. TEST(ScannerRegionGuard, SurvivesConcurrentDecommitMidSweep) { SYSTEM_INFO si{}; @@ -4200,7 +4198,7 @@ TEST(ScannerRegionGuard, SurvivesConcurrentDecommitMidSweep) // Whether or not any fault landed, no emitted event may carry a zero count or a mismatched window. EXPECT_TRUE(event_payload_ok); } -#endif // _MSC_VER +#endif // _MSC_VER || _WIN64 // --- Tests for the name/string resilience tiers (ResolveMode::RttiVtable / StringXref) --- diff --git a/tests/test_string_xref.cpp b/tests/test_string_xref.cpp index 637010a..e006923 100644 --- a/tests/test_string_xref.cpp +++ b/tests/test_string_xref.cpp @@ -1170,7 +1170,7 @@ TEST(StringXrefTest, ErrorToStringIsNoexceptAndTotal) } } -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(_WIN64) // Mirror of the scanner region guard for the string-xref window scans. find_string_xref reads each // execute-readable window returned by collect_executable_windows with unguarded byte reads (narrow shape scan) and // Zydis decoding (broad scan); scan_window_narrow_guarded / scan_window_broad_guarded backstop a concurrent decommit / @@ -1178,9 +1178,9 @@ TEST(StringXrefTest, ErrorToStringIsNoexceptAndTotal) // executable window while a second thread decommits and recommits a separate trailing executable window. Every // iteration must still resolve to the stable site: a faulted trailing window is skipped, and any references collected // before a swallowed fault are discarded by the guarded wrappers. A run where the decommit never lands inside the read -// window is a valid pass for the fault path; the __except / skip-the-window mechanism is pinned deterministically by -// MemoryGuardedReadFault and the seh_read_bytes NoAccess / GuardPage tests in test_memory.cpp. MSVC-only for the same -// reason as the scanner guard test. +// window is a valid pass for the fault path; the __except / VEH skip-the-window mechanism is pinned deterministically +// by MemoryGuardedReadFault and the seh_read_bytes NoAccess / GuardPage tests in test_memory.cpp. 32-bit MinGW is +// excluded because the process-wide vectored guard is x64-only there. TEST(StringXrefRegionGuard, SurvivesConcurrentDecommitMidScan) { SYSTEM_INFO si{}; @@ -1243,4 +1243,4 @@ TEST(StringXrefRegionGuard, SurvivesConcurrentDecommitMidScan) EXPECT_EQ(*result, reference_site); } } -#endif // _MSC_VER +#endif // _MSC_VER || _WIN64 diff --git a/tests/test_version.cpp b/tests/test_version.cpp index 5b0737b..ff981ad 100644 --- a/tests/test_version.cpp +++ b/tests/test_version.cpp @@ -13,8 +13,8 @@ namespace // version bump must touch in this file. Every other case below is relational so it tracks the macros // automatically. The release workflow separately guards that CMakeLists.txt project(VERSION) matches the tag. EXPECT_EQ(DMK_VERSION_MAJOR, 3); - EXPECT_EQ(DMK_VERSION_MINOR, 8); - EXPECT_EQ(DMK_VERSION_PATCH, 2); + EXPECT_EQ(DMK_VERSION_MINOR, 9); + EXPECT_EQ(DMK_VERSION_PATCH, 0); } TEST(VersionTest, VersionStringMatchesMacros) From 52b62a6163577493d158a6c0a8145b8d5fd4e4e2 Mon Sep 17 00:00:00 2001 From: Quang Trinh Date: Sun, 21 Jun 2026 18:38:36 +0700 Subject: [PATCH 2/3] fix(ci): resolve MinGW runtime DLLs from the active g++ on PATH --- .github/workflows/pr-check.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-check.yml b/.github/workflows/pr-check.yml index 1da0c67..6e73454 100644 --- a/.github/workflows/pr-check.yml +++ b/.github/workflows/pr-check.yml @@ -91,7 +91,10 @@ jobs: - name: Copy MinGW runtime DLLs run: | $testDir = "build\mingw-debug\tests" - Get-ChildItem "${{ env.MINGW_BIN }}\lib*.dll" | ForEach-Object { + # Resolve the runtime DLLs from the g++ that actually built the tests (off PATH) rather than a hard-coded + # Chocolatey subpath, whose bin layout varies by package version and may not exist on the runner image. + $mingwBin = Split-Path -Parent (Get-Command g++).Source + Get-ChildItem "$mingwBin\lib*.dll" -ErrorAction SilentlyContinue | ForEach-Object { Copy-Item $_.FullName "$testDir\" } shell: powershell From ceacee397fcbd1718f3fd3f5c63bc7d0ea56fc00 Mon Sep 17 00:00:00 2001 From: Quang Trinh Date: Sun, 21 Jun 2026 18:38:37 +0700 Subject: [PATCH 3/3] chore: address review feedback (API-discipline label, ASan-copy note, test naming) --- include/DetourModKit/bootstrap.hpp | 3 +++ src/memory.cpp | 4 ++++ tests/test_memory.cpp | 30 +++++++++++++++--------------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/include/DetourModKit/bootstrap.hpp b/include/DetourModKit/bootstrap.hpp index 0f63111..68928b3 100644 --- a/include/DetourModKit/bootstrap.hpp +++ b/include/DetourModKit/bootstrap.hpp @@ -82,6 +82,9 @@ namespace DetourModKit * @param shutdown_fn Called exactly once before DMK_Shutdown(). * @return bool32_t TRUE if the worker was started, FALSE if the process gate, instance mutex, or event creation * failed. + * @note Setup/control-plane only -- forwarded from DllMain's DLL_PROCESS_ATTACH under the loader lock; it + * allocates, configures the logger, and starts a thread, so call it only from the DllMain attach path, + * never from a hook or input callback. * @note noexcept by contract: this runs under the loader lock, so any internal throw (e.g. std::bad_alloc while * Logger::configure builds its prefix / path strings) is caught, the partial attach is rolled back, and * FALSE is returned rather than letting an exception cross the loader lock (undefined behavior). diff --git a/src/memory.cpp b/src/memory.cpp index e14e454..85996e2 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -2124,6 +2124,10 @@ namespace DetourModKit // in one __try region, mirroring seh_read_chain_bytes: one uniform failure path for the whole operation. const ptrdiff_t *const offs = offsets.data(); const size_t count = offsets.size(); + // The copies below use plain std::memcpy, not the __movsb ASan shim seh_write_bytes uses. The single-shot + // primitive writes arbitrary scan/code memory that can abut an ASan-poisoned redzone; a chain write instead + // lands on a plausibility-screened, resolved address, so plain memcpy stays correct and still lets ASan flag a + // genuinely out-of-bounds chain target. This matches seh_read_chain_bytes, the design mirror of this function. __try { uintptr_t cur = base; diff --git a/tests/test_memory.cpp b/tests/test_memory.cpp index f125d4b..24c951a 100644 --- a/tests/test_memory.cpp +++ b/tests/test_memory.cpp @@ -2229,12 +2229,12 @@ TEST_F(MemoryTest, ReadPtrUnsafeReadsMisalignedPointer) ASSERT_NE(region, nullptr); auto *base = static_cast(region); - constexpr std::uintptr_t kSentinel = 0xDEADBEEFCAFEF00Dull; - constexpr ptrdiff_t kMisalignedOffset = 1; // deliberately not a multiple of alignof(uintptr_t) - std::memcpy(base + kMisalignedOffset, &kSentinel, sizeof(kSentinel)); + constexpr std::uintptr_t SENTINEL = 0xDEADBEEFCAFEF00Dull; + constexpr ptrdiff_t MISALIGNED_OFFSET = 1; // deliberately not a multiple of alignof(uintptr_t) + std::memcpy(base + MISALIGNED_OFFSET, &SENTINEL, sizeof(SENTINEL)); - const std::uintptr_t value = Memory::read_ptr_unsafe(reinterpret_cast(base), kMisalignedOffset); - EXPECT_EQ(value, kSentinel); + const std::uintptr_t value = Memory::read_ptr_unsafe(reinterpret_cast(base), MISALIGNED_OFFSET); + EXPECT_EQ(value, SENTINEL); // An unmapped low address still fails closed to 0 (the fault is swallowed, not propagated). EXPECT_EQ(Memory::read_ptr_unsafe(0, 0), 0u); @@ -2376,8 +2376,8 @@ TEST_F(MemoryTest, GuardedReadsAreThreadIsolatedUnderConcurrency) { void *good = VirtualAlloc(nullptr, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); ASSERT_NE(good, nullptr); - constexpr uintptr_t kSentinel = 0x5151515151515151ULL; - *reinterpret_cast(good) = kSentinel; + constexpr uintptr_t SENTINEL = 0x5151515151515151ULL; + *reinterpret_cast(good) = SENTINEL; // A deliberately non-readable page the test owns until teardown. This used to MEM_RELEASE a PAGE_READWRITE region // to get an unmapped address, but a released VA can be recycled and remapped by the allocations that spawning the @@ -2399,7 +2399,7 @@ TEST_F(MemoryTest, GuardedReadsAreThreadIsolatedUnderConcurrency) { if (t % 2 == 0) { - if (Memory::read_ptr_unsafe(reinterpret_cast(good), 0) != kSentinel) + if (Memory::read_ptr_unsafe(reinterpret_cast(good), 0) != SENTINEL) all_correct.store(false, std::memory_order_relaxed); } else if (Memory::read_ptr_unsafe(reinterpret_cast(unreadable), 0) != 0u) @@ -2425,8 +2425,8 @@ TEST_F(MemoryTest, GuardedReadsSurviveConcurrentShutdown) { void *good = VirtualAlloc(nullptr, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); ASSERT_NE(good, nullptr); - constexpr uintptr_t kSentinel = 0xA5A5A5A5A5A5A5A5ULL; - *reinterpret_cast(good) = kSentinel; + constexpr uintptr_t SENTINEL = 0xA5A5A5A5A5A5A5A5ULL; + *reinterpret_cast(good) = SENTINEL; // A deliberately non-readable page the test owns until teardown: a committed PAGE_NOACCESS page faults on every // read and cannot be recycled, unlike a MEM_RELEASE'd VA, which the allocations from spawning the reader threads @@ -2449,7 +2449,7 @@ TEST_F(MemoryTest, GuardedReadsSurviveConcurrentShutdown) const uintptr_t v = Memory::read_ptr_unsafe(reinterpret_cast(target), 0); if (target == good) { - if (v == kSentinel) + if (v == SENTINEL) seen_good.fetch_add(1, std::memory_order_relaxed); } else if (v == 0u) @@ -2476,7 +2476,7 @@ TEST_F(MemoryTest, GuardedReadsSurviveConcurrentShutdown) for (auto &th : readers) th.join(); - EXPECT_EQ(Memory::read_ptr_unsafe(reinterpret_cast(good), 0), kSentinel); + EXPECT_EQ(Memory::read_ptr_unsafe(reinterpret_cast(good), 0), SENTINEL); VirtualFree(unreadable, 0, MEM_RELEASE); VirtualFree(good, 0, MEM_RELEASE); } @@ -2621,8 +2621,8 @@ TEST_F(MemoryTest, SehWriteBytes_ReadOnlyPageFaultsClosed) void *page = VirtualAlloc(nullptr, 4096, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); ASSERT_NE(page, nullptr); - constexpr uint32_t kSentinel = 0xA5A5A5A5u; - *reinterpret_cast(page) = kSentinel; + constexpr uint32_t SENTINEL = 0xA5A5A5A5u; + *reinterpret_cast(page) = SENTINEL; DWORD old_protect = 0; ASSERT_NE(VirtualProtect(page, 4096, PAGE_READONLY, &old_protect), 0); @@ -2633,7 +2633,7 @@ TEST_F(MemoryTest, SehWriteBytes_ReadOnlyPageFaultsClosed) // Restore write access to read the page back and prove the bytes never changed. ASSERT_NE(VirtualProtect(page, 4096, old_protect, &old_protect), 0); - EXPECT_EQ(*reinterpret_cast(page), kSentinel); + EXPECT_EQ(*reinterpret_cast(page), SENTINEL); VirtualFree(page, 0, MEM_RELEASE); }