Skip to content

Commit 746bf19

Browse files
committed
feat(sdk): Add DurableInstrumentationPlugin interface and plugin runner
1 parent 5ce3a7a commit 746bf19

14 files changed

Lines changed: 1114 additions & 0 deletions

sdk/src/main/java/software/amazon/lambda/durable/DurableConfig.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.io.InputStream;
77
import java.time.Duration;
8+
import java.util.List;
89
import java.util.Objects;
910
import java.util.Properties;
1011
import java.util.concurrent.ExecutorService;
@@ -21,6 +22,8 @@
2122
import software.amazon.lambda.durable.client.DurableExecutionClient;
2223
import software.amazon.lambda.durable.client.LambdaDurableFunctionsClient;
2324
import software.amazon.lambda.durable.logging.LoggerConfig;
25+
import software.amazon.lambda.durable.plugin.DurableInstrumentationPlugin;
26+
import software.amazon.lambda.durable.plugin.PluginRunner;
2427
import software.amazon.lambda.durable.retry.PollingStrategies;
2528
import software.amazon.lambda.durable.retry.PollingStrategy;
2629
import software.amazon.lambda.durable.serde.JacksonSerDes;
@@ -94,6 +97,7 @@ public final class DurableConfig {
9497
private final LoggerConfig loggerConfig;
9598
private final PollingStrategy pollingStrategy;
9699
private final Duration checkpointDelay;
100+
private final PluginRunner pluginRunner;
97101

98102
private DurableConfig(Builder builder) {
99103
this.durableExecutionClient = Objects.requireNonNullElseGet(
@@ -104,6 +108,7 @@ private DurableConfig(Builder builder) {
104108
this.loggerConfig = Objects.requireNonNullElseGet(builder.loggerConfig, LoggerConfig::defaults);
105109
this.pollingStrategy = Objects.requireNonNullElse(builder.pollingStrategy, PollingStrategies.Presets.DEFAULT);
106110
this.checkpointDelay = Objects.requireNonNullElseGet(builder.checkpointDelay, () -> Duration.ofSeconds(0));
111+
this.pluginRunner = builder.plugins != null ? new PluginRunner(builder.plugins) : PluginRunner.noOp();
107112

108113
validateConfiguration();
109114
}
@@ -180,6 +185,16 @@ public Duration getCheckpointDelay() {
180185
return checkpointDelay;
181186
}
182187

188+
/**
189+
* Gets the plugin runner that dispatches lifecycle events to registered plugins.
190+
*
191+
* @return PluginRunner instance (never null — returns no-op runner if no plugins configured)
192+
* @experimental This method is experimental and may be changed or removed in future releases.
193+
*/
194+
public PluginRunner getPluginRunner() {
195+
return pluginRunner;
196+
}
197+
183198
public void validateConfiguration() {
184199
if (getDurableExecutionClient() == null) {
185200
throw new IllegalStateException("DurableExecutionClient configuration failed");
@@ -274,6 +289,7 @@ public static final class Builder {
274289
private LoggerConfig loggerConfig;
275290
private PollingStrategy pollingStrategy;
276291
private Duration checkpointDelay;
292+
private List<DurableInstrumentationPlugin> plugins;
277293

278294
public Builder() {}
279295

@@ -383,6 +399,32 @@ public Builder withCheckpointDelay(Duration duration) {
383399
return this;
384400
}
385401

402+
/**
403+
* Sets instrumentation plugins for observability and tracing.
404+
*
405+
* <p>Plugins receive lifecycle callbacks at key points during durable execution, enabling integration with
406+
* tracing systems (e.g., OpenTelemetry, X-Ray), custom metrics, and logging enrichment.
407+
*
408+
* <p>Multiple plugins can be provided and will be called in order. Plugin errors are swallowed to prevent
409+
* instrumentation from affecting execution correctness.
410+
*
411+
* <p>Example:
412+
*
413+
* <pre>{@code
414+
* DurableConfig.builder()
415+
* .withPlugins(List.of(new OpenTelemetryDurablePlugin()))
416+
* .build();
417+
* }</pre>
418+
*
419+
* @param plugins list of instrumentation plugins
420+
* @return This builder
421+
* @experimental This method is experimental and may be changed or removed in future releases.
422+
*/
423+
public Builder withPlugins(List<DurableInstrumentationPlugin> plugins) {
424+
this.plugins = plugins;
425+
return this;
426+
}
427+
386428
/**
387429
* Builds the DurableConfig instance.
388430
*
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Information provided when an operation attempt ends.
9+
*
10+
* @param id operation ID — hashed
11+
* @param rawId operation ID — unhashed
12+
* @param name human-readable operation name (may be null)
13+
* @param type operation type
14+
* @param subType operation sub-type (may be null)
15+
* @param parentId parent operation ID — hashed (null for root-level operations)
16+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
17+
* @param startTimestamp when the attempt started
18+
* @param endTimestamp when the attempt ended
19+
* @param attempt 1-based attempt number
20+
* @param outcome the attempt outcome (SUCCEEDED, FAILED, or RETRYING)
21+
* @param error non-null if the attempt failed
22+
* @param nextAttemptDelaySeconds non-null if outcome is RETRYING
23+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
24+
*/
25+
public record AttemptEndInfo(
26+
String id,
27+
String rawId,
28+
String name,
29+
String type,
30+
String subType,
31+
String parentId,
32+
String rawParentId,
33+
Instant startTimestamp,
34+
Instant endTimestamp,
35+
int attempt,
36+
AttemptOutcome outcome,
37+
Throwable error,
38+
Integer nextAttemptDelaySeconds) {}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Attempt-level information for step retry hooks.
9+
*
10+
* @param id operation ID — hashed
11+
* @param rawId operation ID — unhashed
12+
* @param name human-readable operation name (may be null)
13+
* @param type operation type
14+
* @param subType operation sub-type (may be null)
15+
* @param parentId parent operation ID — hashed (null for root-level operations)
16+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
17+
* @param startTimestamp when the attempt started
18+
* @param attempt 1-based attempt number
19+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
20+
*/
21+
public record AttemptInfo(
22+
String id,
23+
String rawId,
24+
String name,
25+
String type,
26+
String subType,
27+
String parentId,
28+
String rawParentId,
29+
Instant startTimestamp,
30+
int attempt) {}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Possible outcomes for an operation attempt.
7+
*
8+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
9+
*/
10+
public enum AttemptOutcome {
11+
SUCCEEDED,
12+
FAILED,
13+
RETRYING
14+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Plugin interface for instrumenting durable execution lifecycle events.
7+
*
8+
* <p>Implement this interface to integrate observability tools (OpenTelemetry, Datadog, etc.) with the durable
9+
* execution SDK. The SDK calls these hooks at key lifecycle points without requiring modifications to core SDK code.
10+
*
11+
* <p>All methods have default no-op implementations, allowing plugins to override only the hooks they need.
12+
*
13+
* <p>Plugin errors are isolated — exceptions thrown by plugin methods are caught and logged but never disrupt SDK
14+
* execution.
15+
*
16+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
17+
*/
18+
public interface DurableInstrumentationPlugin {
19+
20+
// ─── Invocation-level hooks ──────────────────────────────────────────
21+
22+
/**
23+
* Called at the start of each Lambda invocation. Use to set up per-invocation state (trace ID, invocation span).
24+
*
25+
* <p>Check {@link InvocationInfo#isFirstInvocation()} to detect the first invocation of an execution (useful for
26+
* sampling decisions or execution-level span creation).
27+
*/
28+
default void onInvocationStart(InvocationInfo info) {}
29+
30+
/**
31+
* Called at the end of each Lambda invocation. Use to flush spans/metrics before Lambda freezes.
32+
*
33+
* <p>This hook is awaited — the SDK blocks until it returns. This is the only safe flush point before Lambda
34+
* freezes the execution environment.
35+
*
36+
* <p>Check {@link InvocationEndInfo#executionStatus()} to detect if the execution reached a terminal state in this
37+
* invocation (useful for writing summary records or flushing final data).
38+
*/
39+
default void onInvocationEnd(InvocationEndInfo info) {}
40+
41+
// ─── Operation-level hooks ───────────────────────────────────────────
42+
43+
/** Called when an operation starts (including replay). Use for logging/metrics that want replay visibility. */
44+
default void onOperationStart(OperationInfo info) {}
45+
46+
/**
47+
* Called when an operation reaches a terminal status for the first time (not on replay).
48+
*
49+
* <p>The OTel plugin creates operation spans here with backfilled start/end timestamps.
50+
*/
51+
default void onOperationEnd(OperationEndInfo info) {}
52+
53+
// ─── Operation Attempt-level hooks ───────────────────────────────────
54+
55+
/**
56+
* Called before each attempt of a retryable operation (step, waitForCondition). Use to start an attempt span.
57+
*
58+
* <p>This hook fires on the same thread as user code, so plugins can set OTel context via
59+
* {@code Context.makeCurrent()} here.
60+
*/
61+
default void onOperationAttemptStart(AttemptInfo info) {}
62+
63+
/**
64+
* Called after each attempt completes (succeeded, failed, or retrying). Use to end the attempt span and record
65+
* outcome/errors.
66+
*
67+
* <p>This hook fires on the same thread as user code, so plugins can close OTel scopes here.
68+
*/
69+
default void onOperationAttemptEnd(AttemptEndInfo info) {}
70+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Terminal status of a durable execution.
7+
*
8+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
9+
*/
10+
public enum ExecutionStatus {
11+
SUCCEEDED,
12+
FAILED
13+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Information provided at the end of a Lambda invocation.
7+
*
8+
* <p>If the execution reached a terminal state during this invocation, {@link #executionStatus()} will be non-null.
9+
*
10+
* @param requestId the Lambda request ID for this invocation
11+
* @param executionArn the durable execution ARN
12+
* @param isFirstInvocation true if this is the first invocation of the execution
13+
* @param executionStatus non-null if the execution reached a terminal state (SUCCEEDED or FAILED) in this invocation
14+
* @param executionError non-null if the execution failed
15+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
16+
*/
17+
public record InvocationEndInfo(
18+
String requestId,
19+
String executionArn,
20+
boolean isFirstInvocation,
21+
ExecutionStatus executionStatus,
22+
Throwable executionError) {}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Invocation-level information available to plugin hooks.
7+
*
8+
* @param requestId the Lambda request ID for this invocation
9+
* @param executionArn the durable execution ARN
10+
* @param isFirstInvocation true if this is the first invocation of the execution (not a replay invocation)
11+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
12+
*/
13+
public record InvocationInfo(String requestId, String executionArn, boolean isFirstInvocation) {}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Extended operation information for operation end events.
9+
*
10+
* @param id operation ID — hashed
11+
* @param rawId operation ID — unhashed
12+
* @param name human-readable operation name (may be null)
13+
* @param type operation type
14+
* @param subType operation sub-type (may be null)
15+
* @param parentId parent operation ID — hashed (null for root-level operations)
16+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
17+
* @param startTimestamp when the operation started
18+
* @param endTimestamp when the operation ended
19+
* @param error non-null if the operation failed
20+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
21+
*/
22+
public record OperationEndInfo(
23+
String id,
24+
String rawId,
25+
String name,
26+
String type,
27+
String subType,
28+
String parentId,
29+
String rawParentId,
30+
Instant startTimestamp,
31+
Instant endTimestamp,
32+
Throwable error) {}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Operation-level information available to plugin hooks.
9+
*
10+
* <p>Field names mirror the {@code Operation} type from the AWS SDK for consistency.
11+
*
12+
* @param id operation ID — hashed (unique within the execution)
13+
* @param rawId operation ID — unhashed (e.g. "1", "2", "hash(1)-1" for child contexts)
14+
* @param name human-readable operation name (may be null)
15+
* @param type operation type (STEP, WAIT, CONTEXT, CHAINED_INVOKE, CALLBACK)
16+
* @param subType operation sub-type (Map, Parallel, RunInChildContext, WaitForCondition, etc.) — may be null
17+
* @param parentId parent operation ID — hashed (null for root-level operations)
18+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
19+
* @param startTimestamp when the operation started (may be from a prior invocation)
20+
* @param endTimestamp when the operation ended (null if still running)
21+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
22+
*/
23+
public record OperationInfo(
24+
String id,
25+
String rawId,
26+
String name,
27+
String type,
28+
String subType,
29+
String parentId,
30+
String rawParentId,
31+
Instant startTimestamp,
32+
Instant endTimestamp) {}

0 commit comments

Comments
 (0)