Skip to content

Commit e62ad29

Browse files
committed
parity: Add jitter policies on retry
Adds jitter policies for retry: - none: no jitter. Total delay is exactly the computed delay. - full: random number between 0 and computed delay. Total delay is [delay, delay*2] - half: random number between [0.5 x delay, delay]. Total delay is [delay*1.5, delay*2]
1 parent 9c47087 commit e62ad29

1 file changed

Lines changed: 34 additions & 8 deletions

File tree

src/aws_durable_execution_sdk_python/retries.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
from enum import StrEnum
56
import random
67
import re
78
import sys
@@ -10,9 +11,40 @@
1011

1112
if TYPE_CHECKING:
1213
from collections.abc import Callable
14+
from aws_durable_execution_sdk_python.types import JitterStrategy
1315

1416
Numeric = int | float
1517

18+
# region Jitter
19+
20+
class JitterStrategy(StrEnum):
21+
"""
22+
Jitter strategies are used to introduce noise when attempting to retry
23+
an invoke. We introduce noise to prevent a thundering-herd effect where
24+
a group of accesses (e.g. invokes) happen at once.
25+
26+
Jitter is meant to be used to spread operations across time.
27+
28+
members:
29+
:NONE: No jitter; use the exact calculated delay
30+
:FULL: Full jitter; random delay between 0 and calculated delay
31+
:HALF: Half jitter; random delay between 0.5x and 1.0x of the calculated delay
32+
"""
33+
NONE = "NONE"
34+
FULL = "FULL"
35+
HALF = "HALF"
36+
37+
def compute_jitter(self, delay) -> int:
38+
match self:
39+
case JitterStrategy.NONE:
40+
return 0
41+
case JitterStrategy.FULL:
42+
return random.random() * delay # noqa: S311
43+
case JitterStrategy.HALF:
44+
return (random.random() * 0.5 + 0.5) # noqa: S311
45+
46+
# endregion Jitter
47+
1648

1749
@dataclass
1850
class RetryDecision:
@@ -38,7 +70,7 @@ class RetryStrategyConfig:
3870
initial_delay_seconds: int = 5
3971
max_delay_seconds: int = 300 # 5 minutes
4072
backoff_rate: Numeric = 2.0
41-
jitter_seconds: Numeric = 1.0
73+
jitter_strategy: JitterStrategy = field(default=JitterStrategy.FULL)
4274
retryable_errors: list[str | re.Pattern] = field(
4375
default_factory=lambda: [re.compile(r".*")]
4476
)
@@ -77,9 +109,7 @@ def retry_strategy(error: Exception, attempts_made: int) -> RetryDecision:
77109
config.initial_delay_seconds * (config.backoff_rate ** (attempts_made - 1)),
78110
config.max_delay_seconds,
79111
)
80-
81-
# Add jitter (random not for cryptographic purposes, hence noqa)
82-
jitter = (random.random() * 2 - 1) * config.jitter_seconds # noqa: S311
112+
jitter = config.jitter_strategy.compute_jitter(delay)
83113
final_delay = max(1, delay + jitter)
84114

85115
return RetryDecision.retry(round(final_delay))
@@ -104,7 +134,6 @@ def default(cls) -> Callable[[Exception, int], RetryDecision]:
104134
initial_delay_seconds=5,
105135
max_delay_seconds=60,
106136
backoff_rate=2,
107-
jitter_seconds=1,
108137
)
109138
)
110139

@@ -114,7 +143,6 @@ def transient(cls) -> Callable[[Exception, int], RetryDecision]:
114143
return create_retry_strategy(
115144
RetryStrategyConfig(
116145
max_attempts=3,
117-
initial_delay_seconds=1,
118146
backoff_rate=2,
119147
jitter_seconds=0.5,
120148
)
@@ -129,7 +157,6 @@ def resource_availability(cls) -> Callable[[Exception, int], RetryDecision]:
129157
initial_delay_seconds=5,
130158
max_delay_seconds=300,
131159
backoff_rate=2,
132-
jitter_seconds=1,
133160
)
134161
)
135162

@@ -142,6 +169,5 @@ def critical(cls) -> Callable[[Exception, int], RetryDecision]:
142169
initial_delay_seconds=1,
143170
max_delay_seconds=60,
144171
backoff_rate=1.5,
145-
jitter_seconds=0.3,
146172
)
147173
)

0 commit comments

Comments
 (0)