Skip to content

Commit 5938735

Browse files
committed
feat(sdk): Add DurableInstrumentationPlugin interface and plugin runner
1 parent 6ad08c1 commit 5938735

15 files changed

Lines changed: 1240 additions & 0 deletions

sdk/src/main/java/software/amazon/lambda/durable/DurableConfig.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.io.InputStream;
77
import java.time.Duration;
8+
import java.util.List;
89
import java.util.Objects;
910
import java.util.Properties;
1011
import java.util.concurrent.ExecutorService;
@@ -21,6 +22,8 @@
2122
import software.amazon.lambda.durable.client.DurableExecutionClient;
2223
import software.amazon.lambda.durable.client.LambdaDurableFunctionsClient;
2324
import software.amazon.lambda.durable.logging.LoggerConfig;
25+
import software.amazon.lambda.durable.plugin.DurableInstrumentationPlugin;
26+
import software.amazon.lambda.durable.plugin.PluginRunner;
2427
import software.amazon.lambda.durable.retry.PollingStrategies;
2528
import software.amazon.lambda.durable.retry.PollingStrategy;
2629
import software.amazon.lambda.durable.serde.JacksonSerDes;
@@ -94,6 +97,7 @@ public final class DurableConfig {
9497
private final LoggerConfig loggerConfig;
9598
private final PollingStrategy pollingStrategy;
9699
private final Duration checkpointDelay;
100+
private final PluginRunner pluginRunner;
97101

98102
private DurableConfig(Builder builder) {
99103
this.durableExecutionClient = Objects.requireNonNullElseGet(
@@ -104,6 +108,7 @@ private DurableConfig(Builder builder) {
104108
this.loggerConfig = Objects.requireNonNullElseGet(builder.loggerConfig, LoggerConfig::defaults);
105109
this.pollingStrategy = Objects.requireNonNullElse(builder.pollingStrategy, PollingStrategies.Presets.DEFAULT);
106110
this.checkpointDelay = Objects.requireNonNullElseGet(builder.checkpointDelay, () -> Duration.ofSeconds(0));
111+
this.pluginRunner = builder.plugins != null ? new PluginRunner(builder.plugins) : PluginRunner.noOp();
107112

108113
validateConfiguration();
109114
}
@@ -180,6 +185,16 @@ public Duration getCheckpointDelay() {
180185
return checkpointDelay;
181186
}
182187

188+
/**
189+
* Gets the plugin runner that dispatches lifecycle events to registered plugins.
190+
*
191+
* @return PluginRunner instance (never null — returns no-op runner if no plugins configured)
192+
* @experimental This method is experimental and may be changed or removed in future releases.
193+
*/
194+
public PluginRunner getPluginRunner() {
195+
return pluginRunner;
196+
}
197+
183198
public void validateConfiguration() {
184199
if (getDurableExecutionClient() == null) {
185200
throw new IllegalStateException("DurableExecutionClient configuration failed");
@@ -274,6 +289,7 @@ public static final class Builder {
274289
private LoggerConfig loggerConfig;
275290
private PollingStrategy pollingStrategy;
276291
private Duration checkpointDelay;
292+
private List<DurableInstrumentationPlugin> plugins;
277293

278294
public Builder() {}
279295

@@ -383,6 +399,32 @@ public Builder withCheckpointDelay(Duration duration) {
383399
return this;
384400
}
385401

402+
/**
403+
* Sets instrumentation plugins for observability and tracing.
404+
*
405+
* <p>Plugins receive lifecycle callbacks at key points during durable execution, enabling integration with
406+
* tracing systems (e.g., OpenTelemetry, X-Ray), custom metrics, and logging enrichment.
407+
*
408+
* <p>Multiple plugins can be provided and will be called in order. Plugin errors are swallowed to prevent
409+
* instrumentation from affecting execution correctness.
410+
*
411+
* <p>Example:
412+
*
413+
* <pre>{@code
414+
* DurableConfig.builder()
415+
* .withPlugins(List.of(new OpenTelemetryDurablePlugin()))
416+
* .build();
417+
* }</pre>
418+
*
419+
* @param plugins list of instrumentation plugins
420+
* @return This builder
421+
* @experimental This method is experimental and may be changed or removed in future releases.
422+
*/
423+
public Builder withPlugins(List<DurableInstrumentationPlugin> plugins) {
424+
this.plugins = plugins;
425+
return this;
426+
}
427+
386428
/**
387429
* Builds the DurableConfig instance.
388430
*
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Information provided when an operation attempt ends.
9+
*
10+
* @param id operation ID
11+
* @param name human-readable operation name (may be null)
12+
* @param type operation type
13+
* @param subType operation sub-type (may be null)
14+
* @param parentId parent operation ID (null for root-level operations)
15+
* @param startTimestamp when the attempt started
16+
* @param endTimestamp when the attempt ended
17+
* @param attempt 1-based attempt number
18+
* @param outcome the attempt outcome (SUCCEEDED, FAILED, or RETRYING)
19+
* @param error non-null if the attempt failed
20+
* @param nextAttemptDelaySeconds non-null if outcome is RETRYING
21+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
22+
*/
23+
public record AttemptEndInfo(
24+
String id,
25+
String name,
26+
String type,
27+
String subType,
28+
String parentId,
29+
Instant startTimestamp,
30+
Instant endTimestamp,
31+
int attempt,
32+
AttemptOutcome outcome,
33+
Throwable error,
34+
Integer nextAttemptDelaySeconds) {}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Attempt-level information for step retry hooks.
9+
*
10+
* @param id operation ID
11+
* @param name human-readable operation name (may be null)
12+
* @param type operation type
13+
* @param subType operation sub-type (may be null)
14+
* @param parentId parent operation ID (null for root-level operations)
15+
* @param startTimestamp when the attempt started
16+
* @param attempt 1-based attempt number
17+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
18+
*/
19+
public record AttemptInfo(
20+
String id, String name, String type, String subType, String parentId, Instant startTimestamp, int attempt) {}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Possible outcomes for an operation attempt.
7+
*
8+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
9+
*/
10+
public enum AttemptOutcome {
11+
SUCCEEDED,
12+
FAILED,
13+
RETRYING
14+
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.util.Collections;
6+
import java.util.Map;
7+
8+
/**
9+
* Plugin interface for instrumenting durable execution lifecycle events.
10+
*
11+
* <p>Implement this interface to integrate observability tools (OpenTelemetry, Datadog, etc.) with the durable
12+
* execution SDK. The SDK calls these hooks at key lifecycle points without requiring modifications to core SDK code.
13+
*
14+
* <p>All methods have default no-op implementations, allowing plugins to override only the hooks they need.
15+
*
16+
* <p>Plugin errors are isolated — exceptions thrown by plugin methods are caught and logged but never disrupt SDK
17+
* execution.
18+
*
19+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
20+
*/
21+
public interface DurableInstrumentationPlugin {
22+
23+
// ─── Execution-level hooks ───────────────────────────────────────────
24+
25+
/**
26+
* Called once when an execution first starts (not on replay invocations). Use for sampling decisions or
27+
* execution-level span creation.
28+
*/
29+
default void onExecutionStart(InvocationInfo info) {}
30+
31+
/**
32+
* Called when an execution reaches a terminal state (succeeded or failed). Use for writing summary records or
33+
* flushing final data.
34+
*
35+
* <p>This hook is awaited — the SDK blocks until it returns before sending the response.
36+
*/
37+
default void onExecutionEnd(ExecutionEndInfo info) {}
38+
39+
// ─── Invocation-level hooks ──────────────────────────────────────────
40+
41+
/**
42+
* Called at the start of each Lambda invocation. Use to set up per-invocation state (trace ID, invocation span).
43+
*/
44+
default void onInvocationStart(InvocationInfo info) {}
45+
46+
/**
47+
* Called at the end of each Lambda invocation. Use to flush spans/metrics before Lambda freezes.
48+
*
49+
* <p>This hook is awaited — the SDK blocks until it returns. This is the only safe flush point before Lambda
50+
* freezes the execution environment.
51+
*/
52+
default void onInvocationEnd(InvocationInfo info) {}
53+
54+
// ─── Operation-level hooks ───────────────────────────────────────────
55+
56+
/**
57+
* Called when an operation starts for the first time (not on replay). Use to record when an operation genuinely
58+
* began.
59+
*/
60+
default void onOperationFirstStart(OperationInfo info) {}
61+
62+
/** Called when an operation starts (including replay). Use for logging/metrics that want replay visibility. */
63+
default void onOperationStart(OperationInfo info) {}
64+
65+
/**
66+
* Called when an operation completes for the first time (not on replay). The OTel plugin creates the operation span
67+
* here with backfilled start/end timestamps.
68+
*/
69+
default void onOperationFirstEnd(OperationEndInfo info) {}
70+
71+
// ─── Operation Attempt-level hooks ───────────────────────────────────
72+
73+
/** Called before each attempt of a retryable operation (step, waitForCondition). Use to start an attempt span. */
74+
default void onOperationAttemptStart(AttemptInfo info) {}
75+
76+
/**
77+
* Called after each attempt completes (succeeded, failed, or retrying). Use to end the attempt span and record
78+
* outcome/errors.
79+
*/
80+
default void onOperationAttemptEnd(AttemptEndInfo info) {}
81+
82+
// ─── Utility hooks ───────────────────────────────────────────────────
83+
84+
/**
85+
* Called when a checkpoint response contains operation status changes. Use for real-time dashboards or metrics on
86+
* operation state transitions.
87+
*/
88+
default void onOperationChange(OperationChangeInfo info) {}
89+
90+
/**
91+
* Returns additional key-value pairs to include in SDK log context via MDC. Called on each log emission. Return
92+
* empty map for no enrichment.
93+
*
94+
* <p>Implementations must be thread-safe as this may be called from any thread.
95+
*/
96+
default Map<String, String> enrichLogContext() {
97+
return Collections.emptyMap();
98+
}
99+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.util.Map;
6+
import software.amazon.awssdk.services.lambda.model.Operation;
7+
8+
/**
9+
* Information provided when a durable execution ends.
10+
*
11+
* @param requestId the Lambda request ID
12+
* @param executionArn the durable execution ARN
13+
* @param status the execution outcome (SUCCEEDED or FAILED)
14+
* @param executionResult the result if succeeded (may be null)
15+
* @param executionError the error if failed (may be null)
16+
* @param executionInput the original execution input
17+
* @param operations all operations in the execution (final state)
18+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
19+
*/
20+
public record ExecutionEndInfo(
21+
String requestId,
22+
String executionArn,
23+
ExecutionStatus status,
24+
Object executionResult,
25+
Throwable executionError,
26+
Object executionInput,
27+
Map<String, Operation> operations) {}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Terminal status of a durable execution.
7+
*
8+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
9+
*/
10+
public enum ExecutionStatus {
11+
SUCCEEDED,
12+
FAILED
13+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
/**
6+
* Invocation-level information available to plugin hooks.
7+
*
8+
* @param requestId the Lambda request ID for this invocation
9+
* @param executionArn the durable execution ARN
10+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
11+
*/
12+
public record InvocationInfo(String requestId, String executionArn) {}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.util.Map;
6+
import software.amazon.awssdk.services.lambda.model.Operation;
7+
8+
/**
9+
* Information about operation state changes from a checkpoint response.
10+
*
11+
* @param requestId the Lambda request ID
12+
* @param executionArn the durable execution ARN
13+
* @param updatedOperations operations whose status changed in this checkpoint response
14+
* @param operations all operations in the execution (current state)
15+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
16+
*/
17+
public record OperationChangeInfo(
18+
String requestId,
19+
String executionArn,
20+
Map<String, Operation> updatedOperations,
21+
Map<String, Operation> operations) {}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package software.amazon.lambda.durable.plugin;
4+
5+
import java.time.Instant;
6+
7+
/**
8+
* Extended operation information for operation end events.
9+
*
10+
* @param id operation ID
11+
* @param name human-readable operation name (may be null)
12+
* @param type operation type
13+
* @param subType operation sub-type (may be null)
14+
* @param parentId parent operation ID (null for root-level operations)
15+
* @param startTimestamp when the operation started
16+
* @param endTimestamp when the operation ended
17+
* @param error non-null if the operation failed
18+
* <p><b>Preview API:</b> This API is experimental and may be changed or removed in future releases.
19+
*/
20+
public record OperationEndInfo(
21+
String id,
22+
String name,
23+
String type,
24+
String subType,
25+
String parentId,
26+
Instant startTimestamp,
27+
Instant endTimestamp,
28+
Throwable error) {}

0 commit comments

Comments
 (0)