Skip to content

Commit c12c67f

Browse files
authored
feat: nested-scope context save/restore via ScopeStack (#496)
1 parent b7cc386 commit c12c67f

8 files changed

Lines changed: 601 additions & 141 deletions

File tree

ddprof-lib/src/main/cpp/javaApi.cpp

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ Java_com_datadoghq_profiler_OTelContext_readProcessCtx0(JNIEnv *env, jclass unus
530530
#endif
531531
}
532532

533-
extern "C" DLLEXPORT jobjectArray JNICALL
533+
extern "C" DLLEXPORT jobject JNICALL
534534
Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jclass unused, jlongArray metadata) {
535535
ProfiledThread* thrd = ProfiledThread::current();
536536
assert(thrd != nullptr);
@@ -541,35 +541,37 @@ Java_com_datadoghq_profiler_JavaProfiler_initializeContextTLS0(JNIEnv* env, jcla
541541

542542
OtelThreadContextRecord* record = thrd->getOtelContextRecord();
543543

544+
// Contiguity of record + tag_encodings + LRS is enforced by alignas(8) on _otel_ctx_record
545+
// plus sizeof(OtelThreadContextRecord) being a multiple of 8 (see thread.h).
546+
// Compile-time alignment check always runs; runtime pointer-layout check is debug-only.
547+
static_assert(DD_TAGS_CAPACITY * sizeof(u32) % alignof(u64) == 0,
548+
"tag encodings array size must be aligned to u64 for contiguous sidecar layout");
549+
#ifdef DEBUG
550+
uint8_t* record_start = reinterpret_cast<uint8_t*>(record);
551+
uint8_t* sidecar_start = reinterpret_cast<uint8_t*>(thrd->getOtelTagEncodingsPtr());
552+
assert(sidecar_start == record_start + OTEL_MAX_RECORD_SIZE
553+
&& "_otel_ctx_record and _otel_tag_encodings must be contiguous");
554+
#endif
555+
544556
// Fill metadata[6]: [VALID_OFFSET, TRACE_ID_OFFSET, SPAN_ID_OFFSET,
545-
// ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_SIDECAR_OFFSET]
557+
// ATTRS_DATA_SIZE_OFFSET, ATTRS_DATA_OFFSET, LRS_OFFSET].
558+
// All offsets are absolute within the unified buffer returned below.
546559
if (metadata != nullptr && env->GetArrayLength(metadata) >= 6) {
547560
jlong meta[6];
548561
meta[0] = (jlong)offsetof(OtelThreadContextRecord, valid);
549562
meta[1] = (jlong)offsetof(OtelThreadContextRecord, trace_id);
550563
meta[2] = (jlong)offsetof(OtelThreadContextRecord, span_id);
551564
meta[3] = (jlong)offsetof(OtelThreadContextRecord, attrs_data_size);
552565
meta[4] = (jlong)offsetof(OtelThreadContextRecord, attrs_data);
553-
meta[5] = (jlong)(DD_TAGS_CAPACITY * sizeof(u32)); // LRS sidecar offset in sidecar buffer
566+
meta[5] = (jlong)(OTEL_MAX_RECORD_SIZE + DD_TAGS_CAPACITY * sizeof(u32));
554567
env->SetLongArrayRegion(metadata, 0, 6, meta);
555568
}
556569

557-
// Create 2 DirectByteBuffers: [record, sidecar]
558-
jclass bbClass = env->FindClass("java/nio/ByteBuffer");
559-
jobjectArray result = env->NewObjectArray(2, bbClass, nullptr);
560-
561-
// recordBuffer: 640 bytes over the OtelThreadContextRecord
562-
jobject recordBuf = env->NewDirectByteBuffer((void*)record, (jlong)OTEL_MAX_RECORD_SIZE);
563-
env->SetObjectArrayElement(result, 0, recordBuf);
564-
565-
// sidecarBuffer: covers _otel_tag_encodings[DD_TAGS_CAPACITY] + _otel_local_root_span_id (contiguous)
566-
static_assert(DD_TAGS_CAPACITY * sizeof(u32) % alignof(u64) == 0,
567-
"tag encodings array size must be aligned to u64 for contiguous sidecar layout");
568-
size_t sidecarSize = DD_TAGS_CAPACITY * sizeof(u32) + sizeof(u64);
569-
jobject sidecarBuf = env->NewDirectByteBuffer((void*)thrd->getOtelTagEncodingsPtr(), (jlong)sidecarSize);
570-
env->SetObjectArrayElement(result, 1, sidecarBuf);
571-
572-
return result;
570+
// Single contiguous view over [record | tag_encodings | LRS] — used for per-field
571+
// access and for bulk snapshot/restore. All three regions are in one ProfiledThread
572+
// memory block.
573+
size_t totalSize = OTEL_MAX_RECORD_SIZE + DD_TAGS_CAPACITY * sizeof(u32) + sizeof(u64);
574+
return env->NewDirectByteBuffer((void*)record, (jlong)totalSize);
573575
}
574576

575577
extern "C" DLLEXPORT jint JNICALL

ddprof-lib/src/main/cpp/thread.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,15 @@ class ProfiledThread : public ThreadLocalData {
7373
UnwindFailures _unwind_failures;
7474
bool _otel_ctx_initialized;
7575
bool _crash_protection_active;
76-
OtelThreadContextRecord _otel_ctx_record;
76+
// alignas(8) + sizeof(OtelThreadContextRecord)==640 (multiple of 8) guarantee
77+
// _otel_tag_encodings sits at +640 with no padding, so the three fields form one
78+
// 688-byte contiguous region exposed as a combined DirectByteBuffer.
79+
alignas(8) OtelThreadContextRecord _otel_ctx_record;
7780
// These two fields MUST be contiguous and 8-byte aligned — the JNI layer
7881
// exposes them as a single DirectByteBuffer (sidecar), and VarHandle long
7982
// views require 8-byte alignment for the buffer base address.
83+
// Read invariant: sidecar readers must gate on record->valid (see ContextApi::get).
84+
// ThreadContext.restore() relies on this to perform a bulk memcpy under valid=0.
8085
alignas(8) u32 _otel_tag_encodings[DD_TAGS_CAPACITY];
8186
u64 _otel_local_root_span_id;
8287

ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,6 @@ void copyTags(int[] snapshot) {
225225
tlsContextStorage.get().copyCustoms(snapshot);
226226
}
227227

228-
/**
229228
/**
230229
* Dumps the JFR recording at the provided path
231230
* @param recording the path to the recording
@@ -305,11 +304,11 @@ public Map<String, Long> getDebugCounters() {
305304

306305
private static ThreadContext initializeThreadContext() {
307306
long[] metadata = new long[6];
308-
ByteBuffer[] buffers = initializeContextTLS0(metadata);
309-
if (buffers == null) {
307+
ByteBuffer buffer = initializeContextTLS0(metadata);
308+
if (buffer == null) {
310309
throw new IllegalStateException("Failed to initialize OTEL TLS — ProfiledThread not available");
311310
}
312-
return new ThreadContext(buffers[0], buffers[1], metadata);
311+
return new ThreadContext(buffer, metadata);
313312
}
314313

315314
private static native boolean init0();
@@ -342,19 +341,20 @@ private static ThreadContext initializeThreadContext() {
342341
private static native String getStatus0();
343342

344343
/**
345-
* Initializes context TLS for the current thread and returns 2 DirectByteBuffers.
346-
* Sets otel_thread_ctx_v1 permanently to the thread's OtelThreadContextRecord.
344+
* Initializes context TLS for the current thread and returns a single DirectByteBuffer
345+
* spanning the OTEP record + tag-encoding sidecar + LRS (688 bytes, contiguous in
346+
* ProfiledThread). Sets otel_thread_ctx_v1 permanently to the thread's
347+
* OtelThreadContextRecord.
347348
*
348-
* @param metadata output array filled with:
349-
* [0] VALID_OFFSET — offset of 'valid' field in the record
350-
* [1] TRACE_ID_OFFSET — offset of 'trace_id' field in the record
351-
* [2] SPAN_ID_OFFSET — offset of 'span_id' field in the record
349+
* @param metadata output array filled with absolute offsets into the returned buffer:
350+
* [0] VALID_OFFSET — offset of 'valid' field
351+
* [1] TRACE_ID_OFFSET — offset of 'trace_id' field
352+
* [2] SPAN_ID_OFFSET — offset of 'span_id' field
352353
* [3] ATTRS_DATA_SIZE_OFFSET — offset of 'attrs_data_size' field
353354
* [4] ATTRS_DATA_OFFSET — offset of 'attrs_data' field
354-
* [5] LRS_SIDECAR_OFFSET — offset of local_root_span_id in sidecar buffer
355-
* @return array of 2 ByteBuffers: [recordBuffer, sidecarBuffer]
355+
* [5] LRS_OFFSET — offset of local_root_span_id
356356
*/
357-
private static native ByteBuffer[] initializeContextTLS0(long[] metadata);
357+
private static native ByteBuffer initializeContextTLS0(long[] metadata);
358358

359359
public ThreadContext getThreadContext() {
360360
return tlsContextStorage.get();
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright 2026 Datadog, Inc
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*/
10+
package com.datadoghq.profiler;
11+
12+
import java.util.Arrays;
13+
14+
/**
15+
* Per-thread stack of {@link ThreadContext} snapshots for nested scopes.
16+
*
17+
* <p>Provides bulk save/restore of the full OTEP record + sidecar state via one memcpy per
18+
* transition. Not thread-safe: a single stack instance must be accessed only from its
19+
* owning thread.
20+
*
21+
* <p>Storage is tiered to keep shallow nesting allocation-free:
22+
* <ul>
23+
* <li>Depths 0 .. {@value #FAST_DEPTH}-1: one contiguous byte[] allocated eagerly.</li>
24+
* <li>Depths {@value #FAST_DEPTH} and beyond: lazily allocated {@value #CHUNK_DEPTH}-slot
25+
* chunks, each a single byte[]. Chunks are allocated once per depth band and reused.</li>
26+
* </ul>
27+
*/
28+
public final class ScopeStack {
29+
private static final int FAST_DEPTH = 6;
30+
private static final int CHUNK_DEPTH = 12;
31+
private static final int SLOT_SIZE = ThreadContext.SNAPSHOT_SIZE;
32+
33+
private final byte[] fast = new byte[FAST_DEPTH * SLOT_SIZE];
34+
// chunks[i] covers depths [FAST_DEPTH + i*CHUNK_DEPTH .. FAST_DEPTH + (i+1)*CHUNK_DEPTH).
35+
private byte[][] chunks;
36+
private int depth;
37+
38+
public void enter(ThreadContext ctx) {
39+
int d = depth;
40+
ctx.snapshot(bufferFor(d), offsetFor(d));
41+
depth = d + 1;
42+
}
43+
44+
public void exit(ThreadContext ctx) {
45+
int d = depth - 1;
46+
if (d < 0) {
47+
throw new IllegalStateException("ScopeStack underflow");
48+
}
49+
ctx.restore(bufferFor(d), offsetFor(d));
50+
depth = d;
51+
}
52+
53+
/** Current nesting depth (number of outstanding {@link #enter} calls). */
54+
public int depth() {
55+
return depth;
56+
}
57+
58+
private byte[] bufferFor(int d) {
59+
if (d < FAST_DEPTH) {
60+
return fast;
61+
}
62+
// chunkFor is idempotent: if this depth was previously populated (via a matching enter),
63+
// it returns the existing chunk without allocating.
64+
return chunkFor((d - FAST_DEPTH) / CHUNK_DEPTH);
65+
}
66+
67+
private static int offsetFor(int d) {
68+
int slot = d < FAST_DEPTH ? d : (d - FAST_DEPTH) % CHUNK_DEPTH;
69+
return slot * SLOT_SIZE;
70+
}
71+
72+
private byte[] chunkFor(int idx) {
73+
byte[][] cs = chunks;
74+
if (cs == null) {
75+
cs = new byte[4][];
76+
chunks = cs;
77+
} else if (idx >= cs.length) {
78+
int newLen = cs.length;
79+
while (newLen <= idx) {
80+
newLen <<= 1;
81+
}
82+
cs = Arrays.copyOf(cs, newLen);
83+
chunks = cs;
84+
}
85+
byte[] c = cs[idx];
86+
if (c == null) {
87+
c = new byte[CHUNK_DEPTH * SLOT_SIZE];
88+
cs[idx] = c;
89+
}
90+
return c;
91+
}
92+
}

0 commit comments

Comments
 (0)