Skip to content

Commit ab050fe

Browse files
committed
feat(tracer): implement threaded connection fallback
Signed-off-by: Alexandre Rulleau <alexandre.rulleau@datadoghq.com>
1 parent 1169cf2 commit ab050fe

4 files changed

Lines changed: 260 additions & 3 deletions

File tree

ext/ddtrace.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,7 @@ static PHP_MINIT_FUNCTION(ddtrace) {
15291529
#endif
15301530
ddshared_minit();
15311531
ddtrace_autoload_minit();
1532+
ddtrace_sidecar_minit();
15321533

15331534
dd_register_span_data_ce();
15341535
dd_register_fatal_error_ce();
@@ -1612,7 +1613,11 @@ static PHP_MSHUTDOWN_FUNCTION(ddtrace) {
16121613

16131614
ddtrace_user_req_shutdown();
16141615

1615-
ddtrace_sidecar_shutdown();
1616+
// Only shutdown sidecar in MSHUTDOWN for non-CLI SAPIs
1617+
// CLI SAPI shuts down in RSHUTDOWN to allow thread joins before ASAN checks
1618+
if (strcmp(sapi_module.name, "cli") != 0) {
1619+
ddtrace_sidecar_shutdown();
1620+
}
16161621

16171622
ddtrace_live_debugger_mshutdown();
16181623

@@ -2632,6 +2637,21 @@ void dd_internal_handle_fork(void) {
26322637
ddtrace_coms_curl_shutdown();
26332638
ddtrace_coms_clean_background_sender_after_fork();
26342639
}
2640+
2641+
// Handle thread mode after fork
2642+
int32_t current_pid = (int32_t)getpid();
2643+
bool is_child_process = (ddtrace_sidecar_master_pid != 0 &&
2644+
current_pid != ddtrace_sidecar_master_pid);
2645+
2646+
if (is_child_process && ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_THREAD) {
2647+
// Clear inherited master listener state (child doesn't own it)
2648+
ddtrace_ffi_try("Failed clearing inherited listener state",
2649+
ddog_sidecar_clear_inherited_listener());
2650+
2651+
// Don't try to reconnect in thread mode after fork
2652+
// Let sidecar stay unavailable
2653+
LOG(WARN, "Child process after fork with thread mode: sidecar unavailable");
2654+
}
26352655
#endif
26362656
if (DDTRACE_G(agent_config_reader)) {
26372657
ddog_agent_remote_config_reader_drop(DDTRACE_G(agent_config_reader));

ext/handlers_pcntl.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ static void dd_prefork() {
3737

3838
static void dd_handle_fork(zval *return_value) {
3939
if (Z_LVAL_P(return_value) == 0) {
40+
// CHILD PROCESS
41+
42+
// Warn if thread mode is active
43+
if (ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_THREAD) {
44+
LOG(WARN, "pcntl_fork() detected with thread-based sidecar connection. "
45+
"Thread mode is incompatible with fork and may cause instability. "
46+
"Consider using subprocess mode (DD_TRACE_SIDECAR_CONNECTION_MODE=subprocess).");
47+
}
48+
4049
dd_internal_handle_fork();
4150
} else {
4251
#if JOIN_BGS_BEFORE_FORK

ext/sidecar.c

Lines changed: 214 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ ddog_Endpoint *dogstatsd_endpoint; // always set when ddtrace_endpoint is set
2525
struct ddog_InstanceId *ddtrace_sidecar_instance_id;
2626
static uint8_t dd_sidecar_formatted_session_id[36];
2727

28+
// Connection mode tracking
29+
dd_sidecar_active_mode_t ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_NONE;
30+
int32_t ddtrace_sidecar_master_pid = 0;
31+
2832
static inline void dd_set_endpoint_test_token(ddog_Endpoint *endpoint) {
2933
if (zai_config_is_initialized()) {
3034
if (ZSTR_LEN(get_DD_TRACE_AGENT_TEST_SESSION_TOKEN())) {
@@ -158,6 +162,148 @@ static void dd_sidecar_on_reconnect(ddog_SidecarTransport *transport) {
158162

159163
}
160164

165+
// Subprocess connection mode - current default behavior
166+
ddog_SidecarTransport *ddtrace_sidecar_connect_subprocess(void) {
167+
if (!ddtrace_endpoint) {
168+
return NULL;
169+
}
170+
ZEND_ASSERT(dogstatsd_endpoint != NULL);
171+
172+
dd_set_endpoint_test_token(dogstatsd_endpoint);
173+
174+
#ifdef _WIN32
175+
DDOG_PHP_FUNCTION = (const uint8_t *)zend_hash_func;
176+
#endif
177+
178+
char logpath[MAXPATHLEN];
179+
int error_fd = atomic_load(&ddtrace_error_log_fd);
180+
if (error_fd == -1 || ddtrace_get_fd_path(error_fd, logpath) < 0) {
181+
*logpath = 0;
182+
}
183+
184+
ddog_SidecarTransport *sidecar_transport;
185+
if (!ddtrace_ffi_try("Failed connecting to sidecar (subprocess mode)",
186+
ddog_sidecar_connect_php(&sidecar_transport, logpath,
187+
dd_zend_string_to_CharSlice(get_global_DD_TRACE_LOG_LEVEL()),
188+
get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED(),
189+
dd_sidecar_on_reconnect,
190+
ddtrace_endpoint))) {
191+
return NULL;
192+
}
193+
194+
dd_sidecar_post_connect(&sidecar_transport, false, logpath);
195+
196+
// Set active mode
197+
ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_SUBPROCESS;
198+
199+
return sidecar_transport;
200+
}
201+
202+
// Thread connection mode - fallback when subprocess fails
203+
ddog_SidecarTransport *ddtrace_sidecar_connect_thread(void) {
204+
if (!ddtrace_endpoint) {
205+
return NULL;
206+
}
207+
ZEND_ASSERT(dogstatsd_endpoint != NULL);
208+
209+
#ifndef _WIN32
210+
int32_t current_pid = (int32_t)getpid();
211+
bool is_master = (ddtrace_sidecar_master_pid == 0 || current_pid == ddtrace_sidecar_master_pid);
212+
213+
if (is_master) {
214+
// Set master PID
215+
if (ddtrace_sidecar_master_pid == 0) {
216+
ddtrace_sidecar_master_pid = current_pid;
217+
}
218+
219+
// Start master listener thread (only if not already running)
220+
if (!ddog_sidecar_is_master_listener_active(ddtrace_sidecar_master_pid)) {
221+
if (!ddtrace_ffi_try("Failed starting master listener thread",
222+
ddog_sidecar_connect_master(ddtrace_sidecar_master_pid))) {
223+
LOG(WARN, "Failed to start master listener thread");
224+
return NULL;
225+
}
226+
227+
LOG(INFO, "Started master listener thread (PID=%d)", ddtrace_sidecar_master_pid);
228+
}
229+
}
230+
231+
// Connect as worker to master listener
232+
ddog_SidecarTransport *sidecar_transport;
233+
if (!ddtrace_ffi_try("Failed connecting to master listener (thread mode)",
234+
ddog_sidecar_connect_worker(ddtrace_sidecar_master_pid, &sidecar_transport))) {
235+
LOG(WARN, "Failed to connect to master listener");
236+
return NULL;
237+
}
238+
239+
char logpath[MAXPATHLEN];
240+
int error_fd = atomic_load(&ddtrace_error_log_fd);
241+
if (error_fd == -1 || ddtrace_get_fd_path(error_fd, logpath) < 0) {
242+
*logpath = 0;
243+
}
244+
245+
dd_sidecar_post_connect(&sidecar_transport, false, logpath);
246+
247+
// Set active mode
248+
ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_THREAD;
249+
250+
return sidecar_transport;
251+
#else
252+
// Thread mode not supported on Windows
253+
LOG(ERROR, "Thread-based sidecar connection is not supported on Windows");
254+
return NULL;
255+
#endif
256+
}
257+
258+
// Auto-fallback connection logic
259+
ddog_SidecarTransport *ddtrace_sidecar_connect_with_fallback(void) {
260+
zend_long mode = get_global_DD_TRACE_SIDECAR_CONNECTION_MODE();
261+
ddog_SidecarTransport *transport = NULL;
262+
263+
switch (mode) {
264+
case DD_TRACE_SIDECAR_CONNECTION_MODE_SUBPROCESS:
265+
// Force subprocess only
266+
LOG(INFO, "Sidecar connection mode: subprocess (forced)");
267+
transport = ddtrace_sidecar_connect_subprocess();
268+
if (!transport) {
269+
LOG(ERROR, "Subprocess connection failed (mode=subprocess, no fallback)");
270+
}
271+
break;
272+
273+
case DD_TRACE_SIDECAR_CONNECTION_MODE_THREAD:
274+
// Force thread only
275+
LOG(INFO, "Sidecar connection mode: thread (forced)");
276+
transport = ddtrace_sidecar_connect_thread();
277+
if (!transport) {
278+
LOG(ERROR, "Thread connection failed (mode=thread, no fallback)");
279+
}
280+
break;
281+
282+
case DD_TRACE_SIDECAR_CONNECTION_MODE_AUTO:
283+
default:
284+
// Try subprocess first
285+
LOG(INFO, "Sidecar connection mode: auto (trying subprocess first)");
286+
transport = ddtrace_sidecar_connect_subprocess();
287+
288+
if (transport) {
289+
LOG(INFO, "Connected to sidecar via subprocess");
290+
} else {
291+
// Fallback to thread mode
292+
LOG(WARN, "Subprocess connection failed, falling back to thread mode");
293+
transport = ddtrace_sidecar_connect_thread();
294+
295+
if (transport) {
296+
LOG(INFO, "Connected to sidecar via thread (fallback)");
297+
} else {
298+
LOG(ERROR, "Both subprocess and thread connections failed, sidecar unavailable");
299+
}
300+
}
301+
break;
302+
}
303+
304+
return transport;
305+
}
306+
161307
static ddog_SidecarTransport *dd_sidecar_connection_factory_ex(bool is_fork) {
162308
// Should not happen, unless the agent url is malformed
163309
if (!ddtrace_endpoint) {
@@ -189,7 +335,20 @@ static ddog_SidecarTransport *dd_sidecar_connection_factory_ex(bool is_fork) {
189335
}
190336

191337
ddog_SidecarTransport *dd_sidecar_connection_factory(void) {
192-
return dd_sidecar_connection_factory_ex(false);
338+
// Reconnect using the same mode that succeeded initially
339+
switch (ddtrace_sidecar_active_mode) {
340+
case DD_SIDECAR_CONNECTION_SUBPROCESS:
341+
return ddtrace_sidecar_connect_subprocess();
342+
343+
case DD_SIDECAR_CONNECTION_THREAD:
344+
return ddtrace_sidecar_connect_thread();
345+
346+
case DD_SIDECAR_CONNECTION_NONE:
347+
default:
348+
// Shouldn't happen, but fall back to auto mode
349+
LOG(WARN, "Reconnection attempted with no active mode, using fallback logic");
350+
return ddtrace_sidecar_connect_with_fallback();
351+
}
193352
}
194353

195354
bool ddtrace_sidecar_maybe_enable_appsec(bool *appsec_activation, bool *appsec_config) {
@@ -222,7 +381,8 @@ void ddtrace_sidecar_setup(bool appsec_activation, bool appsec_config) {
222381

223382
ddog_init_remote_config(get_global_DD_INSTRUMENTATION_TELEMETRY_ENABLED(), appsec_activation, appsec_config);
224383

225-
ddtrace_sidecar = dd_sidecar_connection_factory();
384+
// Use fallback connection logic
385+
ddtrace_sidecar = ddtrace_sidecar_connect_with_fallback();
226386
if (!ddtrace_sidecar) { // Something went wrong
227387
if (ddtrace_endpoint) {
228388
dd_free_endpoints();
@@ -234,6 +394,15 @@ void ddtrace_sidecar_setup(bool appsec_activation, bool appsec_config) {
234394
}
235395
}
236396

397+
// Initialize sidecar globals at module init
398+
void ddtrace_sidecar_minit(void) {
399+
#ifndef _WIN32
400+
if (ddtrace_sidecar_master_pid == 0) {
401+
ddtrace_sidecar_master_pid = (int32_t)getpid();
402+
}
403+
#endif
404+
}
405+
237406
void ddtrace_sidecar_ensure_active(void) {
238407
if (ddtrace_sidecar) {
239408
ddtrace_sidecar_reconnect(&ddtrace_sidecar, dd_sidecar_connection_factory);
@@ -261,8 +430,29 @@ void ddtrace_sidecar_finalize(bool clear_id) {
261430
}
262431

263432
void ddtrace_sidecar_shutdown(void) {
433+
#ifndef _WIN32
434+
// Shutdown master listener if this is the master process and thread mode is active
435+
int32_t current_pid = (int32_t)getpid();
436+
if (ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_THREAD &&
437+
ddtrace_sidecar_master_pid != 0 &&
438+
current_pid == ddtrace_sidecar_master_pid) {
439+
440+
// Close worker connection first to avoid deadlock
441+
if (ddtrace_sidecar) {
442+
ddog_sidecar_transport_drop(ddtrace_sidecar);
443+
ddtrace_sidecar = NULL;
444+
}
445+
446+
// Then shutdown listener thread
447+
ddtrace_ffi_try("Failed shutting down master listener",
448+
ddog_sidecar_shutdown_master_listener());
449+
}
450+
#endif
451+
452+
// Standard cleanup
264453
if (ddtrace_sidecar_instance_id) {
265454
ddog_sidecar_instanceId_drop(ddtrace_sidecar_instance_id);
455+
ddtrace_sidecar_instance_id = NULL;
266456
}
267457

268458
if (ddtrace_endpoint) {
@@ -271,7 +461,11 @@ void ddtrace_sidecar_shutdown(void) {
271461

272462
if (ddtrace_sidecar) {
273463
ddog_sidecar_transport_drop(ddtrace_sidecar);
464+
ddtrace_sidecar = NULL;
274465
}
466+
467+
// Reset mode
468+
ddtrace_sidecar_active_mode = DD_SIDECAR_CONNECTION_NONE;
275469
}
276470

277471
void ddtrace_force_new_instance_id(void) {
@@ -286,6 +480,19 @@ void ddtrace_reset_sidecar(void) {
286480

287481
if (ddtrace_sidecar) {
288482
ddog_sidecar_transport_drop(ddtrace_sidecar);
483+
ddtrace_sidecar = NULL;
484+
485+
// Don't reconnect in thread mode after fork (Option A: documented incompatibility)
486+
if (ddtrace_sidecar_active_mode == DD_SIDECAR_CONNECTION_THREAD) {
487+
// Sidecar unavailable in child process after fork
488+
LOG(WARN, "Thread mode sidecar cannot be reset after fork, sidecar unavailable");
489+
if (ddtrace_endpoint) {
490+
dd_free_endpoints();
491+
}
492+
return;
493+
}
494+
495+
// For subprocess mode, reconnect with is_fork=true
289496
ddtrace_sidecar = dd_sidecar_connection_factory_ex(true);
290497
if (!ddtrace_sidecar) { // Something went wrong
291498
if (ddtrace_endpoint) {
@@ -596,6 +803,11 @@ void ddtrace_sidecar_rinit(void) {
596803

597804
void ddtrace_sidecar_rshutdown(void) {
598805
ddog_Vec_Tag_drop(DDTRACE_G(active_global_tags));
806+
807+
// For CLI SAPI, shut down sidecar here (before ASAN checks)
808+
if (strcmp(sapi_module.name, "cli") == 0) {
809+
ddtrace_sidecar_shutdown();
810+
}
599811
}
600812

601813
bool ddtrace_alter_test_session_token(zval *old_value, zval *new_value, zend_string *new_str) {

ext/sidecar.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,18 @@
77
#include "ddtrace.h"
88
#include "zend_string.h"
99

10+
// Connection mode tracking
11+
typedef enum {
12+
DD_SIDECAR_CONNECTION_NONE = 0,
13+
DD_SIDECAR_CONNECTION_SUBPROCESS = 1,
14+
DD_SIDECAR_CONNECTION_THREAD = 2
15+
} dd_sidecar_active_mode_t;
16+
1017
extern ddog_SidecarTransport *ddtrace_sidecar;
1118
extern ddog_Endpoint *ddtrace_endpoint;
1219
extern struct ddog_InstanceId *ddtrace_sidecar_instance_id;
20+
extern dd_sidecar_active_mode_t ddtrace_sidecar_active_mode;
21+
extern int32_t ddtrace_sidecar_master_pid;
1322

1423
DDTRACE_PUBLIC const uint8_t *ddtrace_get_formatted_session_id(void);
1524
struct telemetry_rc_info {
@@ -20,6 +29,13 @@ struct telemetry_rc_info {
2029
};
2130
DDTRACE_PUBLIC struct telemetry_rc_info ddtrace_get_telemetry_rc_info(void);
2231

32+
// Connection functions
33+
ddog_SidecarTransport *ddtrace_sidecar_connect_subprocess(void);
34+
ddog_SidecarTransport *ddtrace_sidecar_connect_thread(void);
35+
ddog_SidecarTransport *ddtrace_sidecar_connect_with_fallback(void);
36+
37+
// Lifecycle functions
38+
void ddtrace_sidecar_minit(void);
2339
void ddtrace_sidecar_setup(bool appsec_activation, bool appsec_config);
2440
bool ddtrace_sidecar_maybe_enable_appsec(bool *appsec_activation, bool *appsec_config);
2541
void ddtrace_sidecar_ensure_active(void);

0 commit comments

Comments
 (0)