Skip to content

Commit da837f8

Browse files
refactor(otel): Add X-Ray context extraction with trace ID priority (#464)
1 parent 9eb9942 commit da837f8

11 files changed

Lines changed: 616 additions & 84 deletions

File tree

otel-plugin/pom.xml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
</parent>
1212

1313
<artifactId>aws-durable-execution-sdk-java-otel</artifactId>
14-
<name>AWS Lambda Durable Execution SDK - OpenTelemetry Plugin</name>
14+
<name>AWS Lambda Durable Execution SDK OpenTelemetry Plugin</name>
1515
<description>OpenTelemetry instrumentation plugin for AWS Lambda Durable Execution SDK</description>
1616

1717
<properties>
@@ -48,13 +48,6 @@
4848
<version>${opentelemetry.version}</version>
4949
</dependency>
5050

51-
<!-- AWS X-Ray Propagator (official OTel contrib) -->
52-
<dependency>
53-
<groupId>io.opentelemetry.contrib</groupId>
54-
<artifactId>opentelemetry-aws-xray-propagator</artifactId>
55-
<version>1.57.0-alpha</version>
56-
</dependency>
57-
5851
<!-- SLF4J for logging -->
5952
<dependency>
6053
<groupId>org.slf4j</groupId>

otel-plugin/src/main/java/software/amazon/lambda/durable/otel/ContextExtractor.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22
// SPDX-License-Identifier: Apache-2.0
33
package software.amazon.lambda.durable.otel;
44

5-
import io.opentelemetry.context.Context;
6-
75
/**
8-
* Extracts OTel trace context from the Lambda runtime environment.
6+
* Extracts trace context from the Lambda runtime environment.
97
*
108
* <p>Implementations read trace context from various sources (X-Ray trace header, W3C traceparent, etc.) and return an
11-
* OTel {@link Context} that can be used as the parent for invocation spans.
9+
* {@link ExtractedContext} containing the trace ID and optional parent span ID.
1210
*
1311
* <p>Called once per invocation in {@code onInvocationStart} to establish the parent trace context.
1412
*
@@ -21,7 +19,7 @@ public interface ContextExtractor {
2119
/**
2220
* Extracts trace context from the runtime environment.
2321
*
24-
* @return the extracted OTel context, or {@link Context#root()} if no context is available
22+
* @return the extracted context, or {@code null} if no context is available
2523
*/
26-
Context extract();
24+
ExtractedContext extract();
2725
}

otel-plugin/src/main/java/software/amazon/lambda/durable/otel/DeterministicIdGenerator.java

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,18 @@
1111
/**
1212
* Generates deterministic trace and span IDs for durable execution observability.
1313
*
14-
* <p>All invocations of the same execution share a single trace ID (derived from the execution ARN). Operations get
15-
* stable span IDs derived from the execution ARN + operation ID, ensuring the same operation produces the same span
16-
* across invocations.
14+
* <p>Trace ID resolution order:
1715
*
18-
* <p>When no pending operation ID is set, falls back to random generation (standard OTel behavior).
16+
* <ol>
17+
* <li>If an extracted trace ID is set (from {@code _X_AMZN_TRACE_ID}), use it. The durable execution backend
18+
* propagates the same Root to all invocations, so this naturally unifies the trace.
19+
* <li>If no extracted trace ID is available (local tests, non-Lambda environments), derive a deterministic trace ID
20+
* from the execution ARN using SHA-256.
21+
* <li>If neither is set, fall back to random generation.
22+
* </ol>
23+
*
24+
* <p>Span IDs for operations are deterministic (derived from execution ARN + operation ID), ensuring the same operation
25+
* produces the same span across invocations. When no pending operation ID is set, falls back to random generation.
1926
*
2027
* @deprecated This is a preview API that is experimental and may be changed or removed in future releases.
2128
*/
@@ -24,18 +31,30 @@ public class DeterministicIdGenerator implements IdGenerator {
2431

2532
private static final IdGenerator RANDOM = IdGenerator.random();
2633

27-
private final AtomicReference<String> executionTraceId = new AtomicReference<>(null);
34+
private final AtomicReference<String> extractedTraceId = new AtomicReference<>(null);
35+
private final AtomicReference<String> arnDerivedTraceId = new AtomicReference<>(null);
2836
private final ThreadLocal<String> pendingSpanOperationId = new ThreadLocal<>();
2937
private final AtomicReference<String> durableExecutionArn = new AtomicReference<>(null);
3038

3139
/**
32-
* Sets the execution ARN used for generating deterministic IDs.
40+
* Sets an externally extracted trace ID (e.g., from the X-Ray trace header). This takes highest priority for trace
41+
* ID generation.
42+
*
43+
* @param traceId 32-char lowercase hex trace ID
44+
*/
45+
public void setExtractedTraceId(String traceId) {
46+
this.extractedTraceId.set(traceId);
47+
}
48+
49+
/**
50+
* Sets the execution ARN used for generating deterministic IDs. Computes and caches an ARN-derived trace ID as
51+
* fallback when no extracted trace ID is available.
3352
*
3453
* @param arn the durable execution ARN
3554
*/
3655
public void setDurableExecutionArn(String arn) {
3756
this.durableExecutionArn.set(arn);
38-
this.executionTraceId.set(generateTraceIdFromArn(arn));
57+
this.arnDerivedTraceId.set(generateTraceIdFromArn(arn));
3958
}
4059

4160
/**
@@ -50,9 +69,6 @@ public void setNextSpanOperationId(String operationId) {
5069
/**
5170
* Generates a deterministic span ID for a given operation ID without consuming the ThreadLocal state.
5271
*
53-
* <p>Used for creating non-recording placeholder spans when a parent operation's span context is needed but hasn't
54-
* been exported yet.
55-
*
5672
* @param operationId the operation ID to derive the span ID from
5773
* @return a deterministic 16-char hex span ID
5874
*/
@@ -62,10 +78,17 @@ public String generateSpanIdForOperation(String operationId) {
6278

6379
@Override
6480
public String generateTraceId() {
65-
var cached = executionTraceId.get();
66-
if (cached != null) {
67-
return cached;
81+
// Priority 1: extracted from X-Ray header (backend propagates same Root across invocations)
82+
var extracted = extractedTraceId.get();
83+
if (extracted != null) {
84+
return extracted;
85+
}
86+
// Priority 2: deterministic from execution ARN (local tests, non-Lambda)
87+
var arnDerived = arnDerivedTraceId.get();
88+
if (arnDerived != null) {
89+
return arnDerived;
6890
}
91+
// Priority 3: random fallback
6992
return RANDOM.generateTraceId();
7093
}
7194

@@ -79,27 +102,19 @@ public String generateSpanId() {
79102
return RANDOM.generateSpanId();
80103
}
81104

82-
/**
83-
* Generates a deterministic trace ID from an execution ARN.
84-
*
85-
* <p>Uses SHA-256 hash truncated to 32 hex chars (128 bits) for the trace ID.
86-
*/
105+
/** Generates a deterministic trace ID from an execution ARN using SHA-256 truncated to 32 hex chars. */
87106
private String generateTraceIdFromArn(String arn) {
88107
var hash = sha256(arn);
89-
// Trace ID is 32 hex chars (16 bytes)
90108
return hash.substring(0, 32);
91109
}
92110

93111
/**
94-
* Generates a deterministic span ID from the execution ARN + operation ID.
95-
*
96-
* <p>Uses SHA-256 hash truncated to 16 hex chars (64 bits) for the span ID.
112+
* Generates a deterministic span ID from the execution ARN + operation ID using SHA-256 truncated to 16 hex chars.
97113
*/
98114
private String generateSpanIdFromOperation(String operationId) {
99115
var arn = durableExecutionArn.get();
100116
var input = arn != null ? arn + ":" + operationId : operationId;
101117
var hash = sha256(input);
102-
// Span ID is 16 hex chars (8 bytes)
103118
return hash.substring(0, 16);
104119
}
105120

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.otel;
4+
5+
/**
6+
* Trace context extracted from the Lambda runtime environment.
7+
*
8+
* <p>Contains the trace ID (always present) and an optional parent span ID. When the durable execution backend
9+
* propagates the same X-Ray Root across all invocations, the trace ID will be consistent, enabling spans from different
10+
* invocations to be stitched into a single trace.
11+
*
12+
* @param traceId 32-character lowercase hex trace ID (OTel format, no dashes)
13+
* @param parentSpanId 16-character lowercase hex parent span ID (may be null if no parent available)
14+
* @deprecated This is a preview API that is experimental and may be changed or removed in future releases.
15+
*/
16+
@Deprecated
17+
public record ExtractedContext(String traceId, String parentSpanId) {}

otel-plugin/src/main/java/software/amazon/lambda/durable/otel/OpenTelemetryDurablePlugin.java

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,22 @@
3939
* <li><b>Attempt span</b> — one per user function execution (step attempt, child context run)
4040
* </ul>
4141
*
42-
* <p>Uses deterministic span/trace IDs so all invocations of the same execution share a single trace.
42+
* <p>Trace ID resolution:
43+
*
44+
* <ol>
45+
* <li>Uses the X-Ray trace ID from {@code _X_AMZN_TRACE_ID} when available. The durable execution backend propagates
46+
* the same Root to all invocations of the same execution, naturally unifying the trace.
47+
* <li>Falls back to a deterministic trace ID derived from the execution ARN (for local tests or non-Lambda
48+
* environments).
49+
* </ol>
50+
*
51+
* <p>Requires the ADOT Lambda Layer for trace export. Configure with:
52+
*
53+
* <ul>
54+
* <li>Lambda Layer: {@code aws-otel-java-agent} (ADOT Java auto-instrumentation layer)
55+
* <li>Env var: {@code AWS_LAMBDA_EXEC_WRAPPER=/opt/otel-handler}
56+
* <li>Tracing: Active (to populate {@code _X_AMZN_TRACE_ID})
57+
* </ul>
4358
*
4459
* <p>Thread-safe: uses {@link ConcurrentHashMap} for span/scope storage since the SDK runs user code on multiple
4560
* threads.
@@ -75,6 +90,17 @@ public class OpenTelemetryDurablePlugin implements DurableExecutionPlugin {
7590
/**
7691
* Creates an OTel plugin with default settings: X-Ray context extraction, MDC enabled.
7792
*
93+
* <p>Uses the provided tracer provider builder. Customers configure exporters and span processors on the builder —
94+
* the plugin handles ID generation.
95+
*
96+
* <p>For ADOT layer usage, configure with an OTLP exporter:
97+
*
98+
* <pre>{@code
99+
* var otlpExporter = OtlpGrpcSpanExporter.getDefault(); // sends to localhost:4317
100+
* var plugin = new OpenTelemetryDurablePlugin(
101+
* SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(otlpExporter)));
102+
* }</pre>
103+
*
78104
* @param tracerProviderBuilder the tracer provider builder (ID generator will be overridden)
79105
*/
80106
public OpenTelemetryDurablePlugin(SdkTracerProviderBuilder tracerProviderBuilder) {
@@ -95,10 +121,6 @@ public OpenTelemetryDurablePlugin(
95121
/**
96122
* Creates an OTel plugin with full configuration.
97123
*
98-
* <p>The plugin internally creates a {@link DeterministicIdGenerator} and sets it on the provided builder before
99-
* building the tracer provider. Customers configure exporters, span processors, and samplers on the builder — the
100-
* plugin handles ID generation.
101-
*
102124
* @param tracerProviderBuilder the tracer provider builder (ID generator will be overridden)
103125
* @param contextExtractor extracts parent trace context from the Lambda environment
104126
* @param enableMdc if true, injects trace_id/span_id into SLF4J MDC for log correlation
@@ -117,14 +139,36 @@ public OpenTelemetryDurablePlugin(
117139
@Override
118140
public void onInvocationStart(InvocationInfo info) {
119141
this.durableExecutionArn = info.durableExecutionArn();
142+
143+
// Set execution ARN for deterministic span ID generation
120144
idGenerator.setDurableExecutionArn(info.durableExecutionArn());
121145

122-
// Extract parent context from Lambda environment (X-Ray, W3C, etc.)
123-
var extractedParentContext = contextExtractor.extract();
146+
// Extract trace context from environment (X-Ray header)
147+
var extractedContext = contextExtractor.extract();
124148

125-
// Create invocation span as child of extracted context
149+
if (extractedContext != null) {
150+
// Use the X-Ray trace ID — backend propagates same Root across all invocations
151+
idGenerator.setExtractedTraceId(extractedContext.traceId());
152+
}
153+
// If no extracted context, idGenerator falls back to ARN-derived trace ID
154+
155+
// Determine parent context for the invocation span
156+
Context parentContext;
157+
if (extractedContext != null && extractedContext.parentSpanId() != null) {
158+
// Create a remote parent span context from the X-Ray Parent field
159+
var parentSpanContext = SpanContext.createFromRemoteParent(
160+
extractedContext.traceId(),
161+
extractedContext.parentSpanId(),
162+
TraceFlags.getSampled(),
163+
TraceState.getDefault());
164+
parentContext = Context.root().with(Span.wrap(parentSpanContext));
165+
} else {
166+
parentContext = Context.root();
167+
}
168+
169+
// Create invocation span as child of Lambda's X-Ray segment (via Parent field)
126170
var spanBuilder = tracer.spanBuilder("durable.invocation")
127-
.setParent(extractedParentContext)
171+
.setParent(parentContext)
128172
.setAttribute(DURABLE_EXECUTION_ARN, info.durableExecutionArn())
129173
.setAttribute(DURABLE_FIRST_INVOCATION, info.isFirstInvocation());
130174

0 commit comments

Comments
 (0)