Skip to content

Commit 38ea00b

Browse files
committed
feat(error-handling): Support specific type error reconstruction from checkpoints.
1 parent fe12ac7 commit 38ea00b

13 files changed

Lines changed: 432 additions & 47 deletions

File tree

examples/src/main/java/com/amazonaws/lambda/durable/examples/ErrorHandlingExample.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
* <p>Note: {@code NonDeterministicExecutionException} is thrown by the SDK when code changes between executions (e.g.,
2626
* step order/names changed). It should be fixed in code, not caught.
2727
*/
28-
public class ErrorHandlingExample extends DurableHandler<String, String> {
28+
public class ErrorHandlingExample extends DurableHandler<Object, String> {
2929

3030
private static final Logger logger = LoggerFactory.getLogger(ErrorHandlingExample.class);
3131

3232
@Override
33-
public String handleRequest(String input, DurableContext context) {
33+
public String handleRequest(Object input, DurableContext context) {
3434
// Example 1: Catching StepFailedException with fallback logic
3535
String primaryResult;
3636
try {
@@ -44,7 +44,13 @@ public String handleRequest(String input, DurableContext context) {
4444
.retryStrategy(RetryStrategies.Presets.NO_RETRY)
4545
.build());
4646
} catch (StepFailedException e) {
47-
logger.warn("Primary service failed, using fallback: {}", e.getMessage());
47+
// This block should not run because we attempt to re-construct the original exception type from the
48+
// checkpoint
49+
logger.warn("StepFailedException: Primary service failed, using fallback: {}", e.getMessage());
50+
primaryResult =
51+
context.step("call-fallback-service", String.class, () -> "fallback-result-step-failed-exception");
52+
} catch (RuntimeException e) {
53+
logger.warn("RuntimeException: Primary service failed, using fallback: {}", e.getMessage());
4854
primaryResult = context.step("call-fallback-service", String.class, () -> "fallback-result");
4955
}
5056

examples/src/test/java/com/amazonaws/lambda/durable/examples/CloudBasedIntegrationTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,4 +223,18 @@ void testCustomConfigExample() {
223223
assertTrue(stepResult.contains("user_age"));
224224
assertTrue(stepResult.contains("email_address"));
225225
}
226+
227+
@Test
228+
void testErrorHandlingExample() {
229+
var runner = CloudDurableTestRunner.create(arn("error-handling-example"), String.class, String.class);
230+
var result = runner.run("test-input");
231+
232+
assertEquals(ExecutionStatus.SUCCEEDED, result.getStatus());
233+
234+
var finalResult = result.getResult(String.class);
235+
assertNotNull(finalResult);
236+
assertTrue(finalResult.startsWith("Completed: "));
237+
assertTrue(finalResult.contains("fallback-result"));
238+
assertTrue(finalResult.contains("payment-"));
239+
}
226240
}

examples/src/test/java/com/amazonaws/lambda/durable/examples/ErrorHandlingExampleTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class ErrorHandlingExampleTest {
1313
@Test
1414
void testErrorHandlingWithFallback() {
1515
var handler = new ErrorHandlingExample();
16-
var runner = LocalDurableTestRunner.create(String.class, handler);
16+
var runner = LocalDurableTestRunner.create(Object.class, handler);
1717

1818
var result = runner.run("test-input");
1919

@@ -24,7 +24,7 @@ void testErrorHandlingWithFallback() {
2424
@Test
2525
void testPaymentStepCompletes() {
2626
var handler = new ErrorHandlingExample();
27-
var runner = LocalDurableTestRunner.create(String.class, handler);
27+
var runner = LocalDurableTestRunner.create(Object.class, handler);
2828

2929
var result = runner.run("order-123");
3030

@@ -36,7 +36,7 @@ void testPaymentStepCompletes() {
3636
@Test
3737
void testPaymentStepInterruptedRecovery() {
3838
var handler = new ErrorHandlingExample();
39-
var runner = LocalDurableTestRunner.create(String.class, handler);
39+
var runner = LocalDurableTestRunner.create(Object.class, handler);
4040

4141
// First run: both steps complete normally
4242
var result1 = runner.run("order-456");

examples/template.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,28 @@ Resources:
208208
DockerContext: ../
209209
DockerTag: durable-examples
210210

211+
ErrorHandlingExampleFunction:
212+
Type: AWS::Serverless::Function
213+
Properties:
214+
PackageType: Image
215+
FunctionName: error-handling-example
216+
ImageConfig:
217+
Command: ["com.amazonaws.lambda.durable.examples.ErrorHandlingExample::handleRequest"]
218+
DurableConfig:
219+
ExecutionTimeout: 300
220+
RetentionPeriodInDays: 7
221+
Policies:
222+
- Statement:
223+
- Effect: Allow
224+
Action:
225+
- lambda:CheckpointDurableExecutions
226+
- lambda:GetDurableExecutionState
227+
Resource: !Sub "arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:error-handling-example"
228+
Metadata:
229+
Dockerfile: examples/Dockerfile
230+
DockerContext: ../
231+
DockerTag: durable-examples
232+
211233
Outputs:
212234
SimpleStepExampleFunction:
213235
Description: Simple Step Example Function ARN
@@ -280,3 +302,11 @@ Outputs:
280302
LoggingExampleFunctionName:
281303
Description: Logging Example Function Name
282304
Value: !Ref LoggingExampleFunction
305+
306+
ErrorHandlingExampleFunction:
307+
Description: Error Handling Example Function ARN
308+
Value: !GetAtt ErrorHandlingExampleFunction.Arn
309+
310+
ErrorHandlingExampleFunctionName:
311+
Description: Error Handling Example Function Name
312+
Value: !Ref ErrorHandlingExampleFunction

sdk-integration-tests/src/test/java/com/amazonaws/lambda/durable/ExceptionIntegrationTest.java

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
// SPDX-License-Identifier: Apache-2.0
33
package com.amazonaws.lambda.durable;
44

5-
import static org.junit.jupiter.api.Assertions.*;
5+
import static org.junit.jupiter.api.Assertions.assertEquals;
6+
import static org.junit.jupiter.api.Assertions.assertNotNull;
7+
import static org.junit.jupiter.api.Assertions.assertTrue;
68

7-
import com.amazonaws.lambda.durable.exception.StepFailedException;
89
import com.amazonaws.lambda.durable.exception.StepInterruptedException;
910
import com.amazonaws.lambda.durable.model.ExecutionStatus;
1011
import com.amazonaws.lambda.durable.retry.RetryStrategies;
@@ -47,7 +48,7 @@ void testStepFailedExceptionCanBeCaughtWithFallback() {
4748
StepConfig.builder()
4849
.retryStrategy(RetryStrategies.Presets.NO_RETRY)
4950
.build());
50-
} catch (StepFailedException e) {
51+
} catch (RuntimeException e) {
5152
return ctx.step("fallback", String.class, () -> "fallback-result");
5253
}
5354
});
@@ -58,6 +59,90 @@ void testStepFailedExceptionCanBeCaughtWithFallback() {
5859
assertEquals("fallback-result", result.getResult(String.class));
5960
}
6061

62+
@Test
63+
void testOriginalExceptionTypeIsPreserved() {
64+
var runner = LocalDurableTestRunner.create(String.class, (input, ctx) -> {
65+
ctx.step(
66+
"throws-illegal-arg",
67+
String.class,
68+
() -> {
69+
throw new IllegalArgumentException("Invalid parameter");
70+
},
71+
StepConfig.builder()
72+
.retryStrategy(RetryStrategies.Presets.NO_RETRY)
73+
.build());
74+
return "should-not-reach";
75+
});
76+
77+
// First run - exception is thrown and checkpointed
78+
var result = runner.run("test");
79+
assertEquals(ExecutionStatus.FAILED, result.getStatus());
80+
81+
// Verify the operation failed with the correct exception type
82+
var failedOp = result.getOperation("throws-illegal-arg");
83+
assertNotNull(failedOp);
84+
var error = failedOp.getError();
85+
assertNotNull(error);
86+
assertEquals("java.lang.IllegalArgumentException", error.errorType());
87+
assertEquals("Invalid parameter", error.errorMessage());
88+
}
89+
90+
@Test
91+
void testOriginalExceptionTypeCanBeCaughtSpecifically() {
92+
var runner = LocalDurableTestRunner.create(String.class, (input, ctx) -> {
93+
try {
94+
return ctx.step(
95+
"throws-illegal-state",
96+
String.class,
97+
() -> {
98+
throw new IllegalStateException("Invalid state");
99+
},
100+
StepConfig.builder()
101+
.retryStrategy(RetryStrategies.Presets.NO_RETRY)
102+
.build());
103+
} catch (IllegalStateException e) {
104+
// Catch specific exception type
105+
return ctx.step("handle-illegal-state", String.class, () -> "recovered-from-illegal-state");
106+
} catch (Exception e) {
107+
// This should NOT be caught
108+
return ctx.step("handle-illegal-arg", String.class, () -> "recovered-from-exception");
109+
}
110+
});
111+
112+
var result = runner.runUntilComplete("test");
113+
114+
assertEquals(ExecutionStatus.SUCCEEDED, result.getStatus());
115+
assertEquals("recovered-from-illegal-state", result.getResult(String.class));
116+
}
117+
118+
@Test
119+
void testCustomExceptionTypeIsPreserved() {
120+
var runner = LocalDurableTestRunner.create(String.class, (input, ctx) -> {
121+
ctx.step(
122+
"throws-custom",
123+
String.class,
124+
() -> {
125+
throw new CustomBusinessException("Business rule violated", 42);
126+
},
127+
StepConfig.builder()
128+
.retryStrategy(RetryStrategies.Presets.NO_RETRY)
129+
.build());
130+
return "should-not-reach";
131+
});
132+
133+
var result = runner.runUntilComplete("test");
134+
135+
assertEquals(ExecutionStatus.FAILED, result.getStatus());
136+
137+
// Verify the operation failed with the correct exception type
138+
var failedOp = result.getOperation("throws-custom");
139+
assertNotNull(failedOp);
140+
var error = failedOp.getError();
141+
assertNotNull(error);
142+
assertTrue(error.errorType().contains("CustomBusinessException"));
143+
assertEquals("Business rule violated", error.errorMessage());
144+
}
145+
61146
@Test
62147
void testStepInterruptedExceptionForAtMostOnceAfterCheckpointLoss() {
63148
var executionCount = new AtomicInteger(0);
@@ -87,6 +172,7 @@ void testStepInterruptedExceptionForAtMostOnceAfterCheckpointLoss() {
87172

88173
assertEquals(ExecutionStatus.FAILED, result.getStatus());
89174
assertEquals(1, executionCount.get()); // Should NOT have re-executed
175+
assertEquals(result.getError().get().errorType(), StepInterruptedException.class.getName());
90176
}
91177

92178
@Test
@@ -144,4 +230,18 @@ void testNonDeterministicExceptionOnStepNameChange() {
144230

145231
assertEquals(ExecutionStatus.FAILED, result.getStatus());
146232
}
233+
234+
// Custom exception for testing exception preservation
235+
public static class CustomBusinessException extends RuntimeException {
236+
private final int errorCode;
237+
238+
public CustomBusinessException(String message, int errorCode) {
239+
super(message);
240+
this.errorCode = errorCode;
241+
}
242+
243+
public int getErrorCode() {
244+
return errorCode;
245+
}
246+
}
147247
}

sdk/src/main/java/com/amazonaws/lambda/durable/DurableExecutor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public static <I, O> DurableExecutionOutput execute(
102102
} catch (Exception e) {
103103
Throwable cause = e.getCause() != null ? e.getCause() : e;
104104
logger.debug("Execution failed: {}", cause.getMessage());
105-
return DurableExecutionOutput.failure(cause);
105+
return DurableExecutionOutput.failure(cause, serDes);
106106
}
107107
}
108108

@@ -138,7 +138,7 @@ public static <I, O> DurableExecutionOutput execute(
138138
return DurableExecutionOutput.success(outputPayload);
139139
} catch (Exception e) {
140140
Throwable cause = e.getCause() != null ? e.getCause() : e;
141-
return DurableExecutionOutput.failure(cause);
141+
return DurableExecutionOutput.failure(cause, serDes);
142142
} finally {
143143
// We shutdown the execution to make sure remaining checkpoint calls in the queue are drained
144144
executionManager.shutdown();

sdk/src/main/java/com/amazonaws/lambda/durable/model/DurableExecutionOutput.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package com.amazonaws.lambda.durable.model;
44

55
import com.amazonaws.lambda.durable.exception.StepFailedException;
6+
import com.amazonaws.lambda.durable.serde.SerDes;
67
import software.amazon.awssdk.services.lambda.model.ErrorObject;
78

89
public record DurableExecutionOutput(ExecutionStatus status, String result, ErrorObject error) {
@@ -14,12 +15,12 @@ public static DurableExecutionOutput pending() {
1415
return new DurableExecutionOutput(ExecutionStatus.PENDING, null, null);
1516
}
1617

17-
public static DurableExecutionOutput failure(Throwable e) {
18+
public static DurableExecutionOutput failure(Throwable e, SerDes serDes) {
1819
var errorObject = ErrorObject.builder()
19-
.errorType(e.getClass().getSimpleName())
20+
.errorType(e.getClass().getName())
2021
.errorMessage(e.getMessage())
2122
.stackTrace(StepFailedException.serializeStackTrace(e.getStackTrace()))
22-
// TODO: Add errorData object once we support polymorphic object mappers
23+
.errorData(serDes.serialize(e))
2324
.build();
2425
return new DurableExecutionOutput(ExecutionStatus.FAILED, null, errorObject);
2526
}

sdk/src/main/java/com/amazonaws/lambda/durable/operation/StepOperation.java

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import com.amazonaws.lambda.durable.StepConfig;
66
import com.amazonaws.lambda.durable.StepSemantics;
77
import com.amazonaws.lambda.durable.TypeToken;
8+
import com.amazonaws.lambda.durable.exception.SerDesException;
89
import com.amazonaws.lambda.durable.exception.StepFailedException;
910
import com.amazonaws.lambda.durable.exception.StepInterruptedException;
1011
import com.amazonaws.lambda.durable.execution.ExecutionManager;
1112
import com.amazonaws.lambda.durable.execution.ExecutionPhase;
1213
import com.amazonaws.lambda.durable.execution.ThreadType;
1314
import com.amazonaws.lambda.durable.logging.DurableLogger;
1415
import com.amazonaws.lambda.durable.serde.SerDes;
16+
import com.amazonaws.lambda.durable.util.SneakyThrow;
1517
import java.time.Duration;
1618
import java.time.Instant;
1719
import java.util.concurrent.CompletableFuture;
@@ -234,9 +236,9 @@ private void handleStepError(Throwable e, int attempt) {
234236

235237
private void handleStepFailure(Throwable error, int attempt) {
236238
var errorObject = ErrorObject.builder()
237-
.errorType(error.getClass().getSimpleName())
239+
.errorType(error.getClass().getName())
238240
.errorMessage(error.getMessage())
239-
// TODO: Add errorData object once we support polymorphic object mappers
241+
.errorData(serDes.serialize(error))
240242
.stackTrace(StepFailedException.serializeStackTrace(error.getStackTrace()))
241243
.build();
242244

@@ -344,33 +346,40 @@ public T get() {
344346
return serDes.deserialize(result, resultType);
345347
}
346348
} else {
347-
// It failed so there's some kind of throwable. If we're using a serDes with
348-
// type info, deserialize and rethrow the original
349-
// throwable. Otherwise, throw a new StepFailedException that includes info
350-
// about the original throwable.
351-
352-
// TODO: Enable this feature after introducing polymorphic object mapper
353-
// support.
354-
// String errorData = op.stepDetails().error().errorData();
355-
// if (errorData != null && serDes.supportsIncludingTypeInfo()) {
356-
// SneakyThrow.sneakyThrow((Throwable)
357-
// serDes.deserializeWithTypeInfo(errorData));
358-
// }
359-
360349
var errorType = op.stepDetails().error().errorType();
361350

362351
// Throw StepInterruptedException directly for AT_MOST_ONCE interrupted steps
363-
// Todo: Change once errorData object is implemented
364-
if ("StepInterruptedException".equals(errorType)) {
352+
if (StepInterruptedException.class.getName().equals(errorType)) {
365353
throw new StepInterruptedException(operationId, name);
366354
}
367355

356+
// Attempt to reconstruct and throw the original exception
357+
try {
358+
Class<?> exceptionClass = Class.forName(errorType);
359+
if (Throwable.class.isAssignableFrom(exceptionClass)) {
360+
Throwable original = (Throwable) serDes.deserialize(
361+
op.stepDetails().error().errorData(), exceptionClass.asSubclass(Throwable.class));
362+
363+
if (original != null) {
364+
original.setStackTrace(StepFailedException.deserializeStackTrace(
365+
op.stepDetails().error().stackTrace()));
366+
SneakyThrow.sneakyThrow(original);
367+
}
368+
}
369+
} catch (ClassNotFoundException e) {
370+
logger.warn(
371+
"Cannot re-construct original exception type. Falling back to generic StepFailedException.");
372+
} catch (SerDesException e) {
373+
logger.warn(
374+
"Cannot deserialize original exception data. Falling back to generic StepFailedException.", e);
375+
}
376+
377+
// Fallback: wrap in StepFailedException
368378
throw new StepFailedException(
369379
String.format(
370380
"Step failed with error of type %s. Message: %s",
371381
errorType, op.stepDetails().error().errorMessage()),
372382
null,
373-
// Preserve original stack trace
374383
StepFailedException.deserializeStackTrace(
375384
op.stepDetails().error().stackTrace()));
376385
}

sdk/src/test/java/com/amazonaws/lambda/durable/DurableExecutionTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void testExecuteFailure() {
114114

115115
assertEquals(ExecutionStatus.FAILED, output.status());
116116
assertNotNull(output.error());
117-
assertEquals("RuntimeException", output.error().errorType());
117+
assertEquals("java.lang.RuntimeException", output.error().errorType());
118118
assertEquals("Test error", output.error().errorMessage());
119119
}
120120

0 commit comments

Comments
 (0)