Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions otel-plugin/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
</parent>

<artifactId>aws-durable-execution-sdk-java-otel</artifactId>
<name>AWS Lambda Durable Execution SDK - OpenTelemetry Plugin</name>
<name>AWS Lambda Durable Execution SDK OpenTelemetry Plugin</name>
<description>OpenTelemetry instrumentation plugin for AWS Lambda Durable Execution SDK</description>

<properties>
Expand Down Expand Up @@ -48,13 +48,6 @@
<version>${opentelemetry.version}</version>
</dependency>

<!-- AWS X-Ray Propagator (official OTel contrib) -->
<dependency>
<groupId>io.opentelemetry.contrib</groupId>
<artifactId>opentelemetry-aws-xray-propagator</artifactId>
<version>1.57.0-alpha</version>
</dependency>

<!-- SLF4J for logging -->
<dependency>
<groupId>org.slf4j</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
// SPDX-License-Identifier: Apache-2.0
package software.amazon.lambda.durable.otel;

import io.opentelemetry.context.Context;

/**
* Extracts OTel trace context from the Lambda runtime environment.
* Extracts trace context from the Lambda runtime environment.
*
* <p>Implementations read trace context from various sources (X-Ray trace header, W3C traceparent, etc.) and return an
* OTel {@link Context} that can be used as the parent for invocation spans.
* {@link ExtractedContext} containing the trace ID and optional parent span ID.
*
* <p>Called once per invocation in {@code onInvocationStart} to establish the parent trace context.
*
Expand All @@ -21,7 +19,7 @@ public interface ContextExtractor {
/**
* Extracts trace context from the runtime environment.
*
* @return the extracted OTel context, or {@link Context#root()} if no context is available
* @return the extracted context, or {@code null} if no context is available
*/
Context extract();
ExtractedContext extract();
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,18 @@
/**
* Generates deterministic trace and span IDs for durable execution observability.
*
* <p>All invocations of the same execution share a single trace ID (derived from the execution ARN). Operations get
* stable span IDs derived from the execution ARN + operation ID, ensuring the same operation produces the same span
* across invocations.
* <p>Trace ID resolution order:
*
* <p>When no pending operation ID is set, falls back to random generation (standard OTel behavior).
* <ol>
* <li>If an extracted trace ID is set (from {@code _X_AMZN_TRACE_ID}), use it. The durable execution backend
* propagates the same Root to all invocations, so this naturally unifies the trace.
* <li>If no extracted trace ID is available (local tests, non-Lambda environments), derive a deterministic trace ID
* from the execution ARN using SHA-256.
* <li>If neither is set, fall back to random generation.
* </ol>
*
* <p>Span IDs for operations are deterministic (derived from execution ARN + operation ID), ensuring the same operation
* produces the same span across invocations. When no pending operation ID is set, falls back to random generation.
*
* @deprecated This is a preview API that is experimental and may be changed or removed in future releases.
*/
Expand All @@ -24,18 +31,30 @@ public class DeterministicIdGenerator implements IdGenerator {

private static final IdGenerator RANDOM = IdGenerator.random();

private final AtomicReference<String> executionTraceId = new AtomicReference<>(null);
private final AtomicReference<String> extractedTraceId = new AtomicReference<>(null);
private final AtomicReference<String> arnDerivedTraceId = new AtomicReference<>(null);
private final ThreadLocal<String> pendingSpanOperationId = new ThreadLocal<>();
private final AtomicReference<String> durableExecutionArn = new AtomicReference<>(null);

/**
* Sets the execution ARN used for generating deterministic IDs.
* Sets an externally extracted trace ID (e.g., from the X-Ray trace header). This takes highest priority for trace
* ID generation.
*
* @param traceId 32-char lowercase hex trace ID
*/
public void setExtractedTraceId(String traceId) {
this.extractedTraceId.set(traceId);
}

/**
* Sets the execution ARN used for generating deterministic IDs. Computes and caches an ARN-derived trace ID as
* fallback when no extracted trace ID is available.
*
* @param arn the durable execution ARN
*/
public void setDurableExecutionArn(String arn) {
this.durableExecutionArn.set(arn);
this.executionTraceId.set(generateTraceIdFromArn(arn));
this.arnDerivedTraceId.set(generateTraceIdFromArn(arn));
}

/**
Expand All @@ -50,9 +69,6 @@ public void setNextSpanOperationId(String operationId) {
/**
* Generates a deterministic span ID for a given operation ID without consuming the ThreadLocal state.
*
* <p>Used for creating non-recording placeholder spans when a parent operation's span context is needed but hasn't
* been exported yet.
*
* @param operationId the operation ID to derive the span ID from
* @return a deterministic 16-char hex span ID
*/
Expand All @@ -62,10 +78,17 @@ public String generateSpanIdForOperation(String operationId) {

@Override
public String generateTraceId() {
var cached = executionTraceId.get();
if (cached != null) {
return cached;
// Priority 1: extracted from X-Ray header (backend propagates same Root across invocations)
var extracted = extractedTraceId.get();
if (extracted != null) {
return extracted;
}
// Priority 2: deterministic from execution ARN (local tests, non-Lambda)
var arnDerived = arnDerivedTraceId.get();
if (arnDerived != null) {
return arnDerived;
}
// Priority 3: random fallback
return RANDOM.generateTraceId();
}

Expand All @@ -79,27 +102,19 @@ public String generateSpanId() {
return RANDOM.generateSpanId();
}

/**
* Generates a deterministic trace ID from an execution ARN.
*
* <p>Uses SHA-256 hash truncated to 32 hex chars (128 bits) for the trace ID.
*/
/** Generates a deterministic trace ID from an execution ARN using SHA-256 truncated to 32 hex chars. */
private String generateTraceIdFromArn(String arn) {
var hash = sha256(arn);
// Trace ID is 32 hex chars (16 bytes)
return hash.substring(0, 32);
}

/**
* Generates a deterministic span ID from the execution ARN + operation ID.
*
* <p>Uses SHA-256 hash truncated to 16 hex chars (64 bits) for the span ID.
* Generates a deterministic span ID from the execution ARN + operation ID using SHA-256 truncated to 16 hex chars.
*/
private String generateSpanIdFromOperation(String operationId) {
var arn = durableExecutionArn.get();
var input = arn != null ? arn + ":" + operationId : operationId;
var hash = sha256(input);
// Span ID is 16 hex chars (8 bytes)
return hash.substring(0, 16);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package software.amazon.lambda.durable.otel;

/**
* Trace context extracted from the Lambda runtime environment.
*
* <p>Contains the trace ID (always present) and an optional parent span ID. When the durable execution backend
* propagates the same X-Ray Root across all invocations, the trace ID will be consistent, enabling spans from different
* invocations to be stitched into a single trace.
*
* @param traceId 32-character lowercase hex trace ID (OTel format, no dashes)
* @param parentSpanId 16-character lowercase hex parent span ID (may be null if no parent available)
* @deprecated This is a preview API that is experimental and may be changed or removed in future releases.
*/
@Deprecated
public record ExtractedContext(String traceId, String parentSpanId) {}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,22 @@
* <li><b>Attempt span</b> — one per user function execution (step attempt, child context run)
* </ul>
*
* <p>Uses deterministic span/trace IDs so all invocations of the same execution share a single trace.
* <p>Trace ID resolution:
*
* <ol>
* <li>Uses the X-Ray trace ID from {@code _X_AMZN_TRACE_ID} when available. The durable execution backend propagates
* the same Root to all invocations of the same execution, naturally unifying the trace.
* <li>Falls back to a deterministic trace ID derived from the execution ARN (for local tests or non-Lambda
* environments).
* </ol>
*
* <p>Requires the ADOT Lambda Layer for trace export. Configure with:
*
* <ul>
* <li>Lambda Layer: {@code aws-otel-java-agent} (ADOT Java auto-instrumentation layer)
* <li>Env var: {@code AWS_LAMBDA_EXEC_WRAPPER=/opt/otel-handler}
* <li>Tracing: Active (to populate {@code _X_AMZN_TRACE_ID})
* </ul>
*
* <p>Thread-safe: uses {@link ConcurrentHashMap} for span/scope storage since the SDK runs user code on multiple
* threads.
Expand Down Expand Up @@ -75,6 +90,17 @@ public class OpenTelemetryDurablePlugin implements DurableExecutionPlugin {
/**
* Creates an OTel plugin with default settings: X-Ray context extraction, MDC enabled.
*
* <p>Uses the provided tracer provider builder. Customers configure exporters and span processors on the builder —
* the plugin handles ID generation.
*
* <p>For ADOT layer usage, configure with an OTLP exporter:
*
* <pre>{@code
* var otlpExporter = OtlpGrpcSpanExporter.getDefault(); // sends to localhost:4317
* var plugin = new OpenTelemetryDurablePlugin(
* SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(otlpExporter)));
* }</pre>
*
* @param tracerProviderBuilder the tracer provider builder (ID generator will be overridden)
*/
public OpenTelemetryDurablePlugin(SdkTracerProviderBuilder tracerProviderBuilder) {
Expand All @@ -95,10 +121,6 @@ public OpenTelemetryDurablePlugin(
/**
* Creates an OTel plugin with full configuration.
*
* <p>The plugin internally creates a {@link DeterministicIdGenerator} and sets it on the provided builder before
* building the tracer provider. Customers configure exporters, span processors, and samplers on the builder — the
* plugin handles ID generation.
*
* @param tracerProviderBuilder the tracer provider builder (ID generator will be overridden)
* @param contextExtractor extracts parent trace context from the Lambda environment
* @param enableMdc if true, injects trace_id/span_id into SLF4J MDC for log correlation
Expand All @@ -117,14 +139,36 @@ public OpenTelemetryDurablePlugin(
@Override
public void onInvocationStart(InvocationInfo info) {
this.durableExecutionArn = info.durableExecutionArn();

// Set execution ARN for deterministic span ID generation
idGenerator.setDurableExecutionArn(info.durableExecutionArn());

// Extract parent context from Lambda environment (X-Ray, W3C, etc.)
var extractedParentContext = contextExtractor.extract();
// Extract trace context from environment (X-Ray header)
var extractedContext = contextExtractor.extract();

// Create invocation span as child of extracted context
if (extractedContext != null) {
// Use the X-Ray trace ID — backend propagates same Root across all invocations
idGenerator.setExtractedTraceId(extractedContext.traceId());
}
// If no extracted context, idGenerator falls back to ARN-derived trace ID

// Determine parent context for the invocation span

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this required? I don't think I'm creating an explicit parentSpan for the invocation span.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will look better visually.

├── Lambda segment (created by AWS automatically)
         │
         └── durable.invocation  

vs

├── Lambda segment (created by AWS automatically)
│
├── durable.invocation  

We can remove it later once we have demos ready and if we dont like it

Context parentContext;
if (extractedContext != null && extractedContext.parentSpanId() != null) {
// Create a remote parent span context from the X-Ray Parent field
var parentSpanContext = SpanContext.createFromRemoteParent(
extractedContext.traceId(),
extractedContext.parentSpanId(),
TraceFlags.getSampled(),
TraceState.getDefault());
parentContext = Context.root().with(Span.wrap(parentSpanContext));
} else {
parentContext = Context.root();
}

// Create invocation span as child of Lambda's X-Ray segment (via Parent field)
var spanBuilder = tracer.spanBuilder("durable.invocation")
.setParent(extractedParentContext)
.setParent(parentContext)
.setAttribute(DURABLE_EXECUTION_ARN, info.durableExecutionArn())
.setAttribute(DURABLE_FIRST_INVOCATION, info.isFirstInvocation());

Expand Down
Loading