|
| 1 | +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +package software.amazon.lambda.durable.util; |
| 4 | + |
| 5 | +import java.time.Duration; |
| 6 | +import java.util.Objects; |
| 7 | +import software.amazon.lambda.durable.DurableContext; |
| 8 | +import software.amazon.lambda.durable.TypeToken; |
| 9 | +import software.amazon.lambda.durable.config.RetryOperationConfig; |
| 10 | +import software.amazon.lambda.durable.exception.UnrecoverableDurableExecutionException; |
| 11 | +import software.amazon.lambda.durable.execution.SuspendExecutionException; |
| 12 | +import software.amazon.lambda.durable.retry.RetryDecision; |
| 13 | + |
| 14 | +/** |
| 15 | + * Replay-safe retry loop for any durable operation. |
| 16 | + * |
| 17 | + * <p>Provides the same retry-with-backoff pattern that {@code context.step()} has built in, but for operations that |
| 18 | + * cannot live inside a step ({@code waitForCallback}, {@code invoke}, {@code waitForCondition}, etc.). |
| 19 | + * |
| 20 | + * <p>Every side-effect in the loop is a durable operation, so the loop is replay-safe by construction. On replay, |
| 21 | + * completed operations return cached results instantly and the loop fast-forwards to the current attempt. |
| 22 | + * |
| 23 | + * <h2>Usage — callback retry</h2> |
| 24 | + * |
| 25 | + * <pre>{@code |
| 26 | + * var result = RetryOperationHelper.retryOperation( |
| 27 | + * context, |
| 28 | + * "approval", |
| 29 | + * (ctx, attempt) -> ctx.waitForCallback( |
| 30 | + * "approval-" + attempt, |
| 31 | + * String.class, |
| 32 | + * (callbackId, stepCtx) -> sendApprovalEmail(approverEmail, callbackId) |
| 33 | + * ), |
| 34 | + * RetryOperationConfig.builder() |
| 35 | + * .retryStrategy(RetryStrategies.exponentialBackoff( |
| 36 | + * 3, Duration.ofSeconds(2), Duration.ofSeconds(30), 2.0, JitterStrategy.FULL)) |
| 37 | + * .build() |
| 38 | + * ); |
| 39 | + * }</pre> |
| 40 | + * |
| 41 | + * <h2>Usage — invoke retry (anonymous form)</h2> |
| 42 | + * |
| 43 | + * <pre>{@code |
| 44 | + * var result = RetryOperationHelper.retryOperation( |
| 45 | + * context, |
| 46 | + * (ctx, attempt) -> ctx.invoke( |
| 47 | + * "charge-" + attempt, paymentFnArn, new ChargeRequest(orderId), String.class), |
| 48 | + * RetryOperationConfig.builder() |
| 49 | + * .retryStrategy((err, att) -> att < 3 |
| 50 | + * ? RetryDecision.retry(Duration.ofSeconds(1)) |
| 51 | + * : RetryDecision.fail()) |
| 52 | + * .build() |
| 53 | + * ); |
| 54 | + * }</pre> |
| 55 | + */ |
| 56 | +public final class RetryOperationHelper { |
| 57 | + |
| 58 | + private static final Duration DEFAULT_BACKOFF_DELAY = Duration.ofSeconds(1); |
| 59 | + private static final String BACKOFF_SUFFIX = "-backoff-"; |
| 60 | + private static final String ANONYMOUS_BACKOFF_PREFIX = "retry-backoff-"; |
| 61 | + |
| 62 | + private RetryOperationHelper() { |
| 63 | + // utility class |
| 64 | + } |
| 65 | + |
| 66 | + /** |
| 67 | + * Named form — wraps the retry loop in {@code runInChildContext} by default so all attempts are grouped under a |
| 68 | + * single named operation in execution history. |
| 69 | + * |
| 70 | + * <p>The child-context wrapping can be disabled via |
| 71 | + * {@link RetryOperationConfig.Builder#wrapInChildContext(boolean)}. |
| 72 | + * |
| 73 | + * @param <T> the result type |
| 74 | + * @param context the durable context |
| 75 | + * @param name operation name (used for child context and backoff wait names) |
| 76 | + * @param operation the retryable operation — receives the context and 1-based attempt number |
| 77 | + * @param config retry configuration including the retry strategy |
| 78 | + * @return the operation result |
| 79 | + */ |
| 80 | + @SuppressWarnings("unchecked") |
| 81 | + public static <T> T retryOperation( |
| 82 | + DurableContext context, String name, RetryableOperation<T> operation, RetryOperationConfig config) { |
| 83 | + Objects.requireNonNull(context, "context cannot be null"); |
| 84 | + Objects.requireNonNull(name, "name cannot be null"); |
| 85 | + Objects.requireNonNull(operation, "operation cannot be null"); |
| 86 | + Objects.requireNonNull(config, "config cannot be null"); |
| 87 | + |
| 88 | + if (config.wrapInChildContext()) { |
| 89 | + return (T) context.runInChildContext( |
| 90 | + name, new TypeToken<Object>() {}, childCtx -> executeRetryLoop(childCtx, name, operation, config)); |
| 91 | + } |
| 92 | + return executeRetryLoop(context, name, operation, config); |
| 93 | + } |
| 94 | + |
| 95 | + /** |
| 96 | + * Anonymous form — runs the retry loop directly in the caller's context. No child-context wrapping is applied |
| 97 | + * regardless of the {@code wrapInChildContext} config setting. |
| 98 | + * |
| 99 | + * @param <T> the result type |
| 100 | + * @param context the durable context |
| 101 | + * @param operation the retryable operation — receives the context and 1-based attempt number |
| 102 | + * @param config retry configuration including the retry strategy |
| 103 | + * @return the operation result |
| 104 | + */ |
| 105 | + public static <T> T retryOperation( |
| 106 | + DurableContext context, RetryableOperation<T> operation, RetryOperationConfig config) { |
| 107 | + Objects.requireNonNull(context, "context cannot be null"); |
| 108 | + Objects.requireNonNull(operation, "operation cannot be null"); |
| 109 | + Objects.requireNonNull(config, "config cannot be null"); |
| 110 | + |
| 111 | + return executeRetryLoop(context, null, operation, config); |
| 112 | + } |
| 113 | + |
| 114 | + /** |
| 115 | + * Core retry loop. Replay-safe because every side-effect is a durable operation: the user's operation calls durable |
| 116 | + * primitives, and backoff uses {@code context.wait()}. |
| 117 | + * |
| 118 | + * <p>{@link SuspendExecutionException} and {@link UnrecoverableDurableExecutionException} are never retried — they |
| 119 | + * are internal SDK control flow signals that must propagate immediately. |
| 120 | + */ |
| 121 | + private static <T> T executeRetryLoop( |
| 122 | + DurableContext context, String name, RetryableOperation<T> operation, RetryOperationConfig config) { |
| 123 | + var attempt = 1; |
| 124 | + while (true) { |
| 125 | + try { |
| 126 | + return operation.execute(context, attempt); |
| 127 | + } catch (SuspendExecutionException | UnrecoverableDurableExecutionException e) { |
| 128 | + // Internal SDK control flow — never retry, always propagate |
| 129 | + throw e; |
| 130 | + } catch (Exception e) { |
| 131 | + RetryDecision decision = config.retryStrategy().makeRetryDecision(e, attempt); |
| 132 | + if (!decision.shouldRetry()) { |
| 133 | + throw e; |
| 134 | + } |
| 135 | + |
| 136 | + var delay = decision.delay().isZero() ? DEFAULT_BACKOFF_DELAY : decision.delay(); |
| 137 | + var waitName = name != null ? name + BACKOFF_SUFFIX + attempt : ANONYMOUS_BACKOFF_PREFIX + attempt; |
| 138 | + context.wait(waitName, delay); |
| 139 | + attempt++; |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | +} |
0 commit comments