Skip to content

Commit 9128070

Browse files
committed
feat(sdk): Add DurableInstrumentationPlugin interface and plugin runner
1 parent 5ce3a7a commit 9128070

13 files changed

Lines changed: 1052 additions & 0 deletions

sdk/src/main/java/software/amazon/lambda/durable/DurableConfig.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.io.InputStream;
77
import java.time.Duration;
8+
import java.util.List;
89
import java.util.Objects;
910
import java.util.Properties;
1011
import java.util.concurrent.ExecutorService;
@@ -21,6 +22,8 @@
2122
import software.amazon.lambda.durable.client.DurableExecutionClient;
2223
import software.amazon.lambda.durable.client.LambdaDurableFunctionsClient;
2324
import software.amazon.lambda.durable.logging.LoggerConfig;
25+
import software.amazon.lambda.durable.plugin.DurableExecutionPlugin;
26+
import software.amazon.lambda.durable.plugin.PluginRunner;
2427
import software.amazon.lambda.durable.retry.PollingStrategies;
2528
import software.amazon.lambda.durable.retry.PollingStrategy;
2629
import software.amazon.lambda.durable.serde.JacksonSerDes;
@@ -94,6 +97,7 @@ public final class DurableConfig {
9497
private final LoggerConfig loggerConfig;
9598
private final PollingStrategy pollingStrategy;
9699
private final Duration checkpointDelay;
100+
private final PluginRunner pluginRunner;
97101

98102
private DurableConfig(Builder builder) {
99103
this.durableExecutionClient = Objects.requireNonNullElseGet(
@@ -104,6 +108,7 @@ private DurableConfig(Builder builder) {
104108
this.loggerConfig = Objects.requireNonNullElseGet(builder.loggerConfig, LoggerConfig::defaults);
105109
this.pollingStrategy = Objects.requireNonNullElse(builder.pollingStrategy, PollingStrategies.Presets.DEFAULT);
106110
this.checkpointDelay = Objects.requireNonNullElseGet(builder.checkpointDelay, () -> Duration.ofSeconds(0));
111+
this.pluginRunner = builder.plugins != null ? new PluginRunner(builder.plugins) : PluginRunner.noOp();
107112

108113
validateConfiguration();
109114
}
@@ -180,6 +185,16 @@ public Duration getCheckpointDelay() {
180185
return checkpointDelay;
181186
}
182187

188+
/**
189+
* Gets the plugin runner that dispatches lifecycle events to registered plugins.
190+
*
191+
* @return PluginRunner instance (never null — returns no-op runner if no plugins configured)
192+
* @experimental This method is experimental and may be changed or removed in future releases.
193+
*/
194+
public PluginRunner getPluginRunner() {
195+
return pluginRunner;
196+
}
197+
183198
public void validateConfiguration() {
184199
if (getDurableExecutionClient() == null) {
185200
throw new IllegalStateException("DurableExecutionClient configuration failed");
@@ -274,6 +289,7 @@ public static final class Builder {
274289
private LoggerConfig loggerConfig;
275290
private PollingStrategy pollingStrategy;
276291
private Duration checkpointDelay;
292+
private List<DurableExecutionPlugin> plugins;
277293

278294
public Builder() {}
279295

@@ -383,6 +399,32 @@ public Builder withCheckpointDelay(Duration duration) {
383399
return this;
384400
}
385401

402+
/**
403+
* Sets instrumentation plugins for observability and tracing.
404+
*
405+
* <p>Plugins receive lifecycle callbacks at key points during durable execution, enabling integration with
406+
* tracing systems (e.g., OpenTelemetry, X-Ray), custom metrics, and logging enrichment.
407+
*
408+
* <p>Multiple plugins can be provided and will be called in order. Plugin errors are swallowed to prevent
409+
* instrumentation from affecting execution correctness.
410+
*
411+
* <p>Example:
412+
*
413+
* <pre>{@code
414+
* DurableConfig.builder()
415+
* .withPlugins(List.of(new OpenTelemetryDurablePlugin()))
416+
* .build();
417+
* }</pre>
418+
*
419+
* @param plugins list of instrumentation plugins
420+
* @return This builder
421+
* @experimental This method is experimental and may be changed or removed in future releases.
422+
*/
423+
public Builder withPlugins(List<DurableExecutionPlugin> plugins) {
424+
this.plugins = plugins;
425+
return this;
426+
}
427+
386428
/**
387429
* Builds the DurableConfig instance.
388430
*
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Plugin interface for instrumenting durable execution lifecycle events.
7+
*
8+
* <p>Implement this interface to integrate observability tools (OpenTelemetry, Datadog, etc.) with the durable
9+
* execution SDK. The SDK calls these hooks at key lifecycle points without requiring modifications to core SDK code.
10+
*
11+
* <p>All methods have default no-op implementations, allowing plugins to override only the hooks they need.
12+
*
13+
* <p>Plugin errors are isolated — exceptions thrown by plugin methods are caught and logged but never disrupt SDK
14+
* execution.
15+
*
16+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
17+
*/
18+
public interface DurableExecutionPlugin {
19+
20+
// ─── Invocation-level hooks ──────────────────────────────────────────
21+
22+
/**
23+
* Called at the start of each Lambda invocation. Use to set up per-invocation state (trace ID, invocation span).
24+
*
25+
* <p>Check {@link InvocationInfo#isFirstInvocation()} to detect the first invocation of an execution (useful for
26+
* sampling decisions or execution-level span creation).
27+
*/
28+
default void onInvocationStart(InvocationInfo info) {}
29+
30+
/**
31+
* Called at the end of each Lambda invocation. Use to flush spans/metrics before Lambda freezes.
32+
*
33+
* <p>This hook is awaited — the SDK blocks until it returns. This is the only safe flush point before Lambda
34+
* freezes the execution environment.
35+
*
36+
* <p>Check {@link InvocationEndInfo#invocationStatus()} to detect if the execution reached a terminal state in this
37+
* invocation (useful for writing summary records or flushing final data).
38+
*/
39+
default void onInvocationEnd(InvocationEndInfo info) {}
40+
41+
// ─── Operation-level hooks ───────────────────────────────────────────
42+
43+
/** Called when an operation starts (including replay). Use for logging/metrics that want replay visibility. */
44+
default void onOperationStart(OperationInfo info) {}
45+
46+
/**
47+
* Called when an operation reaches a terminal status for the first time (not on replay).
48+
*
49+
* <p>The OTel plugin creates operation spans here with backfilled start/end timestamps.
50+
*/
51+
default void onOperationEnd(OperationEndInfo info) {}
52+
53+
// ─── User function hooks ─────────────────────────────────────────────
54+
55+
/**
56+
* Called when a user-provided function starts executing. This fires for both step attempts (with {@code attempt}
57+
* set) and child context functions (with {@code attempt} null).
58+
*
59+
* <p>This hook fires on the same thread as user code, so plugins can set OTel context via
60+
* {@code Context.makeCurrent()} here.
61+
*/
62+
default void onUserFunctionStart(UserFunctionStartInfo info) {}
63+
64+
/**
65+
* Called when a user-provided function finishes executing. This fires for both step attempts and child context
66+
* functions.
67+
*
68+
* <p>This hook fires on the same thread as user code, so plugins can close OTel scopes here.
69+
*/
70+
default void onUserFunctionEnd(UserFunctionEndInfo info) {}
71+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Information provided at the end of a Lambda invocation.
7+
*
8+
* @param requestId the Lambda request ID for this invocation
9+
* @param executionArn the durable execution ARN
10+
* @param isFirstInvocation true if this is the first invocation of the execution
11+
* @param invocationStatus the invocation outcome (SUCCEEDED, FAILED, or PENDING)
12+
* @param executionError non-null if the execution failed
13+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
14+
*/
15+
public record InvocationEndInfo(
16+
String requestId,
17+
String executionArn,
18+
boolean isFirstInvocation,
19+
InvocationStatus invocationStatus,
20+
Throwable executionError) {}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Invocation-level information available to plugin hooks.
7+
*
8+
* @param requestId the Lambda request ID for this invocation
9+
* @param executionArn the durable execution ARN
10+
* @param isFirstInvocation true if this is the first invocation of the execution (not a replay invocation)
11+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
12+
*/
13+
public record InvocationInfo(String requestId, String executionArn, boolean isFirstInvocation) {}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Status of a Lambda invocation at the end of its execution.
7+
*
8+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
9+
*/
10+
public enum InvocationStatus {
11+
/** Execution completed successfully in this invocation. */
12+
SUCCEEDED,
13+
/** Execution failed in this invocation. */
14+
FAILED,
15+
/** Execution suspended — will resume in a future invocation. */
16+
PENDING
17+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Extended operation information for operation end events.
9+
*
10+
* @param id operation ID — hashed
11+
* @param rawId operation ID — unhashed
12+
* @param name human-readable operation name (may be null)
13+
* @param type operation type
14+
* @param subType operation sub-type (may be null)
15+
* @param parentId parent operation ID — hashed (null for root-level operations)
16+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
17+
* @param startTimestamp when the operation started
18+
* @param endTimestamp when the operation ended
19+
* @param error non-null if the operation failed
20+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
21+
*/
22+
public record OperationEndInfo(
23+
String id,
24+
String rawId,
25+
String name,
26+
String type,
27+
String subType,
28+
String parentId,
29+
String rawParentId,
30+
Instant startTimestamp,
31+
Instant endTimestamp,
32+
Throwable error) {}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Operation-level information available to plugin hooks.
9+
*
10+
* <p>Field names mirror the {@code Operation} type from the AWS SDK for consistency.
11+
*
12+
* @param id operation ID — hashed (unique within the execution)
13+
* @param rawId operation ID — unhashed (e.g. "1", "2", "hash(1)-1" for child contexts)
14+
* @param name human-readable operation name (may be null)
15+
* @param type operation type (STEP, WAIT, CONTEXT, CHAINED_INVOKE, CALLBACK)
16+
* @param subType operation sub-type (Map, Parallel, RunInChildContext, WaitForCondition, etc.) — may be null
17+
* @param parentId parent operation ID — hashed (null for root-level operations)
18+
* @param rawParentId parent operation ID — unhashed (null for root-level operations)
19+
* @param startTimestamp when the operation started (may be from a prior invocation)
20+
* @param endTimestamp when the operation ended (null if still running)
21+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
22+
*/
23+
public record OperationInfo(
24+
String id,
25+
String rawId,
26+
String name,
27+
String type,
28+
String subType,
29+
String parentId,
30+
String rawParentId,
31+
Instant startTimestamp,
32+
Instant endTimestamp) {}

0 commit comments

Comments
 (0)