Skip to content

Commit b7cc386

Browse files
authored
Validate signal origin in CPU and WallClock handlers (PROF-14324) (#494)
1 parent 25c702b commit b7cc386

11 files changed

Lines changed: 1540 additions & 10 deletions

File tree

ddprof-lib/src/main/cpp/counters.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,11 @@
9898
X(WALKVM_CONT_ENTRY_NULL, "walkvm_cont_entry_null") \
9999
X(NATIVE_LIBS_DROPPED, "native_libs_dropped") \
100100
X(SIGACTION_PATCHED_LIBS, "sigaction_patched_libs") \
101-
X(SIGACTION_INTERCEPTED, "sigaction_intercepted")
101+
X(SIGACTION_INTERCEPTED, "sigaction_intercepted") \
102+
X(CTIMER_SIGNAL_OWN, "ctimer_signal_own") \
103+
X(CTIMER_SIGNAL_FOREIGN, "ctimer_signal_foreign") \
104+
X(WALLCLOCK_SIGNAL_OWN, "wallclock_signal_own") \
105+
X(WALLCLOCK_SIGNAL_FOREIGN, "wallclock_signal_foreign")
102106
#define X_ENUM(a, b) a,
103107
typedef enum CounterId : int {
104108
DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS

ddprof-lib/src/main/cpp/ctimer_linux.cpp

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,21 @@
1717

1818
#ifdef __linux__
1919

20+
#include "counters.h"
2021
#include "guards.h"
2122
#include "ctimer.h"
2223
#include "debugSupport.h"
2324
#include "jvmThread.h"
2425
#include "libraries.h"
26+
#include "log.h"
2527
#include "profiler.h"
28+
#include "signalCookie.h"
2629
#include "threadState.inline.h"
2730
#include <assert.h>
31+
#include <errno.h>
32+
#include <stddef.h>
2833
#include <stdlib.h>
34+
#include <string.h>
2935
#include <sys/syscall.h>
3036
#include <time.h>
3137
#include <unistd.h>
@@ -53,9 +59,25 @@ int CTimer::registerThread(int tid) {
5359
}
5460

5561
struct sigevent sev;
56-
sev.sigev_value.sival_ptr = NULL;
62+
// Zero the whole struct first so any padding / future fields the kernel
63+
// inspects (sigev_notify_function, sigev_notify_attributes on glibc) are
64+
// not populated from stack garbage.
65+
memset(&sev, 0, sizeof(sev));
66+
// Cookie identifying this timer as ddprof-owned. When the signal is delivered
67+
// the handler checks siginfo->si_value.sival_ptr against SignalCookie::cpu()
68+
// and drops/forwards any SIGPROF that does not carry it (e.g. from a Go
69+
// runtime's setitimer(ITIMER_PROF) or a foreign library's raise()).
70+
sev.sigev_value.sival_ptr = SignalCookie::cpu();
5771
sev.sigev_signo = _signal;
5872
sev.sigev_notify = SIGEV_THREAD_ID;
73+
// glibc/musl layout convention: sigev_notify_thread_id sits immediately
74+
// after sigev_notify inside the union — the tid is written as the *second*
75+
// int starting at &sev.sigev_notify, so bytes [sizeof(int), 2*sizeof(int))
76+
// of that int-pointer must be in-bounds of struct sigevent. Guard against
77+
// a future libc change by statically asserting that both ints fit.
78+
static_assert(offsetof(struct sigevent, sigev_notify) + 2 * sizeof(int)
79+
<= sizeof(struct sigevent),
80+
"sigevent layout assumption broken: tid write would overflow");
5981
((int *)&sev.sigev_notify)[1] = tid;
6082

6183
// Use raw syscalls, since libc wrapper allows only predefined clocks
@@ -76,7 +98,21 @@ int CTimer::registerThread(int tid) {
7698
ts.it_interval.tv_sec = (time_t)(_interval / 1000000000);
7799
ts.it_interval.tv_nsec = _interval % 1000000000;
78100
ts.it_value = ts.it_interval;
79-
syscall(__NR_timer_settime, timer, 0, &ts, NULL);
101+
if (syscall(__NR_timer_settime, timer, 0, &ts, NULL) < 0) {
102+
// Arming failed after publishing the timer in _timers[tid]. Reclaim the
103+
// slot only if it still contains this timer; otherwise a concurrent
104+
// unregisterThread(tid) has already claimed responsibility for cleanup
105+
// (avoids a double timer_delete).
106+
int settime_errno = errno;
107+
char errbuf[64];
108+
strerror_r(settime_errno, errbuf, sizeof(errbuf));
109+
Log::warn("timer_settime failed for tid=%d: %s", tid, errbuf);
110+
errno = settime_errno;
111+
if (__sync_bool_compare_and_swap(&_timers[tid], timer + 1, 0)) {
112+
syscall(__NR_timer_delete, timer);
113+
}
114+
return -1;
115+
}
80116
return 0;
81117
}
82118

@@ -119,6 +155,12 @@ Error CTimer::start(Arguments &args) {
119155
_max_timers = max_timers;
120156
}
121157

158+
// Prime the origin-check cache from this non-signal context before any
159+
// SIGPROF can fire — reading the env var lazily from the handler itself
160+
// would go through a C++ function-local-static guard, which is not
161+
// async-signal-safe.
162+
OS::primeSignalOriginCheck();
163+
122164
OS::installSignalHandler(_signal, signalHandler);
123165

124166
// Register all existing threads
@@ -143,6 +185,17 @@ void CTimer::stop() {
143185
}
144186

145187
void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
188+
// Reject signals that did not originate from our timer_create timers.
189+
// This guards against Go's process-wide setitimer(ITIMER_PROF) and other
190+
// foreign SIGPROF sources that would otherwise drive our handler onto
191+
// threads we never registered — see doc/plans/SignalOriginValidation.md.
192+
if (!OS::shouldProcessSignal(siginfo, SI_TIMER, SignalCookie::cpu())) {
193+
Counters::increment(CTIMER_SIGNAL_FOREIGN);
194+
OS::forwardForeignSignal(signo, siginfo, ucontext);
195+
return;
196+
}
197+
Counters::increment(CTIMER_SIGNAL_OWN);
198+
146199
// Atomically try to enter critical section - prevents all reentrancy races
147200
CriticalSection cs;
148201
if (!cs.entered()) {

ddprof-lib/src/main/cpp/itimer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ long ITimer::_interval;
3030
CStack ITimer::_cstack;
3131

3232
void ITimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
33+
// NOTE: ITimer uses setitimer(ITIMER_PROF) which delivers signals with
34+
// si_code==SI_KERNEL — no sival payload is available. The signal-origin
35+
// check implemented in CTimer/WallClock cannot be applied here. ITimer
36+
// is therefore vulnerable to the foreign-SIGPROF deadlock scenario this
37+
// feature addresses. Use CTimer (the default) when signal-origin
38+
// validation is required.
3339
if (!_enabled)
3440
return;
3541

ddprof-lib/src/main/cpp/os.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,73 @@ class OS {
122122
static bool isLinux();
123123
static bool isMusl();
124124

125+
// Returns nullptr on sigaction() failure; returns the previous sa_sigaction otherwise.
125126
static SigAction installSignalHandler(int signo, SigAction action, SigHandler handler = NULL);
126127
static SigAction replaceCrashHandler(SigAction action);
127128
static int getProfilingSignal(int mode);
128129
static bool sendSignalToThread(int thread_id, int signo);
129130

131+
// Send a signal to a specific thread with a cookie payload (synchronous —
132+
// the kernel queues it and returns). Uses rt_tgsigqueueinfo(2) on Linux;
133+
// receiver sees si_code == SI_QUEUE with the cookie in si_value.sival_ptr.
134+
// Engines use this when they need to discriminate their own signals from
135+
// foreign ones in the handler.
136+
static bool sendSignalWithCookie(int thread_id, int signo, void* cookie);
137+
138+
// Accept-policy for a signal in a handler context. Async-signal-safe.
139+
//
140+
// Returns true iff the signal SHOULD be processed by the caller's engine:
141+
// - origin check disabled (accept-all fallback, DDPROF_SIGNAL_ORIGIN_CHECK=0)
142+
// → true
143+
// - origin check enabled AND siginfo matches both expected_si_code and
144+
// expected_cookie → true
145+
// - otherwise → false
146+
//
147+
// Naming note: this is a policy predicate, NOT a pure "did this signal
148+
// come from us?" identification. A `false` answer means "forward this to
149+
// the chained handler"; a `true` answer means "process normally".
150+
// `expected_si_code` is SI_TIMER (timer_create) or SI_QUEUE
151+
// (sendSignalWithCookie).
152+
static bool shouldProcessSignal(siginfo_t* siginfo, int expected_si_code, void* expected_cookie);
153+
154+
// Forwards a signal we decided not to process to the previously-installed
155+
// handler (as captured by installSignalHandler). When
156+
// DDPROF_FORWARD_APPLY_SIGMASK=1 is set, also reproduces the previous
157+
// handler's sa_mask so the chained handler sees the same signal-blocking
158+
// environment it would under normal kernel delivery.
159+
// Async-signal-safe on the fast path (no sa_mask applied). When
160+
// DDPROF_FORWARD_APPLY_SIGMASK=1 is set, the slow path uses raw
161+
// rt_sigprocmask syscalls which are async-signal-safe. The forwarded
162+
// handler's safety is the caller's concern.
163+
//
164+
// Drop semantics (Linux): signals whose previous disposition was SIG_DFL or
165+
// SIG_IGN are silently dropped — kernel-default termination is NOT reproduced
166+
// (terminating the process on every forwarded foreign signal would be worse
167+
// than ignoring it). Callers must not rely on default-action reproduction.
168+
//
169+
// macOS: always a no-op (rt_tgsigqueueinfo is unavailable so no cookie
170+
// discrimination is possible; all signals are accepted by the engine handler
171+
// and never forwarded). signalOriginCheckEnabled() returns false on macOS.
172+
static void forwardForeignSignal(int signo, siginfo_t* siginfo, void* ucontext);
173+
174+
// Runtime feature flag: is the signal origin check active? Reads
175+
// DDPROF_SIGNAL_ORIGIN_CHECK env var and caches the result. Default on;
176+
// set to "false"/"0"/"off"/"no" to disable (regression tests only).
177+
// Callers running from signal-handler context must ensure
178+
// primeSignalOriginCheck() was called earlier from non-signal context
179+
// so the env-var getenv() cost is not paid during signal delivery.
180+
static bool signalOriginCheckEnabled();
181+
182+
// Prime the signalOriginCheckEnabled() cache from non-signal context.
183+
// Called by engine start() paths before installing handlers so later
184+
// env-var overrides are picked up. Safe to call multiple times; no-op
185+
// after the first call unless forceReload=true (used only by unit tests).
186+
//
187+
// Without priming, the cache holds its safe defaults (origin check
188+
// enabled, sigmask chain disabled) — signals still classify correctly
189+
// even if priming never runs.
190+
static void primeSignalOriginCheck(bool forceReload = false);
191+
130192
static void* safeAlloc(size_t size);
131193
static void safeFree(void* addr, size_t size);
132194

0 commit comments

Comments
 (0)