Skip to content

Commit 03dc429

Browse files
feat: adding circuit breaker feature
1 parent 683ba5f commit 03dc429

29 files changed

Lines changed: 2389 additions & 0 deletions

File tree

aws_lambda_powertools/shared/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,6 @@
7676

7777
# Idempotency constants
7878
IDEMPOTENCY_DISABLED_ENV: str = "POWERTOOLS_IDEMPOTENCY_DISABLED"
79+
80+
# Circuit breaker constants
81+
CIRCUIT_BREAKER_DISABLED_ENV: str = "POWERTOOLS_CIRCUIT_BREAKER_DISABLED"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Circuit Breaker utility for protecting unhealthy downstream dependencies.
3+
4+
!!! warning "Alpha / experimental"
5+
This utility is published under the `_alpha` namespace while we collect
6+
feedback. The public API may change in a backwards-incompatible way before it
7+
is promoted to GA. Pin your version and follow the tracking discussion before
8+
relying on it in production.
9+
"""
10+
11+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.circuit_breaker import circuit_breaker
12+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.config import CircuitBreakerConfig
13+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.exceptions import (
14+
CircuitBreakerConfigError,
15+
CircuitBreakerError,
16+
CircuitBreakerOpenError,
17+
CircuitBreakerPersistenceError,
18+
)
19+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.states import (
20+
CircuitInfo,
21+
CircuitState,
22+
CircuitTransition,
23+
)
24+
25+
__all__ = (
26+
"circuit_breaker",
27+
"CircuitBreakerConfig",
28+
"CircuitInfo",
29+
"CircuitState",
30+
"CircuitTransition",
31+
"CircuitBreakerError",
32+
"CircuitBreakerOpenError",
33+
"CircuitBreakerConfigError",
34+
"CircuitBreakerPersistenceError",
35+
)
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
"""
2+
Orchestrator for the Circuit Breaker utility.
3+
4+
:class:`CircuitBreakerHandler` owns the state machine and the per-environment failure
5+
counter; the persistence layer owns the shared truth. This split keeps the healthy
6+
path write-free: failures are counted locally and only persisted on a state transition.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import datetime
12+
import logging
13+
import uuid
14+
from typing import TYPE_CHECKING, Any
15+
16+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.exceptions import CircuitBreakerOpenError
17+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.states import CircuitState, CircuitTransition
18+
19+
if TYPE_CHECKING:
20+
from collections.abc import Callable
21+
22+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.config import CircuitBreakerConfig
23+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.persistence.base import (
24+
CircuitBreakerPersistenceLayer,
25+
)
26+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.states import CircuitInfo
27+
28+
logger = logging.getLogger(__name__)
29+
30+
# Per-environment, per-circuit consecutive counters. Module-level so they survive across
31+
# invocations within the same execution environment, the same way idempotency caches do.
32+
_LOCAL_FAILURES: dict[str, int] = {}
33+
_LOCAL_SUCCESSES: dict[str, int] = {}
34+
35+
# Stable per-environment identifier used to claim the half-open probe lock.
36+
_ENVIRONMENT_ID = uuid.uuid4().hex
37+
38+
39+
class CircuitBreakerHandler:
40+
"""
41+
Drive a single protected call through the circuit breaker state machine.
42+
43+
A new handler is created per invocation by the decorator. It reads the shared state,
44+
routes the call (run, short-circuit, or probe), and records the outcome.
45+
46+
Parameters
47+
----------
48+
function : Callable
49+
The protected function.
50+
name : str
51+
Circuit name.
52+
config : CircuitBreakerConfig
53+
Circuit configuration.
54+
persistence_store : CircuitBreakerPersistenceLayer
55+
Shared state store.
56+
on_circuit_open : Callable | None
57+
Callback invoked with the protected call's own ``*args``/``**kwargs`` plus a
58+
trailing ``circuit`` keyword argument when the circuit is open. If ``None``, an
59+
open circuit raises :class:`CircuitBreakerOpenError`.
60+
function_args : tuple
61+
Positional arguments the protected function was called with.
62+
function_kwargs : dict
63+
Keyword arguments the protected function was called with.
64+
"""
65+
66+
def __init__(
67+
self,
68+
function: Callable,
69+
name: str,
70+
config: CircuitBreakerConfig,
71+
persistence_store: CircuitBreakerPersistenceLayer,
72+
on_circuit_open: Callable | None = None,
73+
on_transition: Callable | None = None,
74+
function_args: tuple | None = None,
75+
function_kwargs: dict | None = None,
76+
):
77+
self.function = function
78+
self.name = name
79+
self.config = config
80+
self.on_circuit_open = on_circuit_open
81+
self.on_transition = on_transition
82+
self.fn_args = function_args or ()
83+
self.fn_kwargs = function_kwargs or {}
84+
85+
persistence_store.configure(config=config, circuit_name=name)
86+
self.persistence_store = persistence_store
87+
88+
def handle(self) -> Any:
89+
"""
90+
Evaluate the circuit and route the call.
91+
92+
Returns
93+
-------
94+
Any
95+
The protected function's result when the call runs, or the
96+
``on_circuit_open`` callback's return value when the circuit is open.
97+
98+
Raises
99+
------
100+
CircuitBreakerOpenError
101+
If the circuit is open and no callback is registered.
102+
"""
103+
record = self.persistence_store.get_state(self.name)
104+
105+
if record.state == CircuitState.CLOSED:
106+
return self._call_closed()
107+
108+
if record.state == CircuitState.OPEN:
109+
# ``opened_at`` may legitimately be 0 (epoch); treat only None as missing.
110+
opened_at = record.opened_at if record.opened_at is not None else self._now()
111+
if self._now() >= opened_at + self.config.recovery_timeout:
112+
# Recovery window elapsed: try to become the single prober.
113+
if self.persistence_store.try_acquire_half_open(self.name, _ENVIRONMENT_ID, opened_at):
114+
self._notify(CircuitState.OPEN, CircuitState.HALF_OPEN, opened_at=opened_at)
115+
return self._call_probe()
116+
return self._open_response(record.to_circuit_info())
117+
118+
# HALF_OPEN: only the environment that owns the probe lock runs.
119+
if record.half_open_owner == _ENVIRONMENT_ID:
120+
return self._call_probe()
121+
return self._open_response(record.to_circuit_info())
122+
123+
def _call_closed(self) -> Any:
124+
"""Run the protected call while the circuit is closed, tracking failures."""
125+
try:
126+
result = self.function(*self.fn_args, **self.fn_kwargs)
127+
except Exception as exc:
128+
if not self.config.counts_as_failure(exc):
129+
raise
130+
failures = _LOCAL_FAILURES.get(self.name, 0) + 1
131+
_LOCAL_FAILURES[self.name] = failures
132+
if failures >= self.config.failure_threshold:
133+
logger.debug("Circuit '%s' tripping CLOSED to OPEN after %d failures.", self.name, failures)
134+
opened_at = self._now()
135+
self.persistence_store.save_open(self.name, failure_count=failures, opened_at=opened_at)
136+
_LOCAL_FAILURES[self.name] = 0
137+
self._notify(CircuitState.CLOSED, CircuitState.OPEN, opened_at=opened_at)
138+
raise
139+
else:
140+
_LOCAL_FAILURES[self.name] = 0
141+
return result
142+
143+
def _call_probe(self) -> Any:
144+
"""Run a probe during half-open, closing or reopening based on the outcome."""
145+
try:
146+
result = self.function(*self.fn_args, **self.fn_kwargs)
147+
except Exception as exc:
148+
if not self.config.counts_as_failure(exc):
149+
raise
150+
logger.debug("Circuit '%s' probe failed; reopening.", self.name)
151+
opened_at = self._now()
152+
self.persistence_store.save_reopen(self.name, opened_at=opened_at)
153+
_LOCAL_SUCCESSES[self.name] = 0
154+
self._notify(CircuitState.HALF_OPEN, CircuitState.OPEN, opened_at=opened_at)
155+
raise
156+
else:
157+
successes = _LOCAL_SUCCESSES.get(self.name, 0) + 1
158+
_LOCAL_SUCCESSES[self.name] = successes
159+
if successes >= self.config.success_threshold:
160+
logger.debug("Circuit '%s' closing after %d probe successes.", self.name, successes)
161+
self.persistence_store.save_closed(self.name)
162+
_LOCAL_SUCCESSES[self.name] = 0
163+
_LOCAL_FAILURES[self.name] = 0
164+
self._notify(CircuitState.HALF_OPEN, CircuitState.CLOSED)
165+
return result
166+
167+
def _open_response(self, circuit: CircuitInfo) -> Any:
168+
"""Produce the response for an open circuit: callback result or raise."""
169+
if self.on_circuit_open is not None:
170+
# Forward the protected call's arguments unchanged: positional stay positional,
171+
# keyword stay keyword. The circuit snapshot is passed as a keyword argument so
172+
# it never collides with positionalized kwargs nor depends on dict ordering.
173+
return self.on_circuit_open(*self.fn_args, **self.fn_kwargs, circuit=circuit)
174+
raise CircuitBreakerOpenError(
175+
f"Circuit '{self.name}' is open.",
176+
circuit=circuit,
177+
)
178+
179+
def _notify(self, from_state: CircuitState, to_state: CircuitState, opened_at: int | None = None) -> None:
180+
"""
181+
Fire the ``on_transition`` hook for a state change.
182+
183+
Called only on real transitions, never on the hot path. Any exception the hook
184+
raises is swallowed and logged: observability must never break the protected call.
185+
"""
186+
if self.on_transition is None:
187+
return
188+
try:
189+
self.on_transition(
190+
CircuitTransition(
191+
circuit_name=self.name,
192+
from_state=from_state,
193+
to_state=to_state,
194+
opened_at=opened_at,
195+
),
196+
)
197+
except Exception:
198+
logger.warning("on_transition hook for circuit '%s' raised; ignoring.", self.name, exc_info=True)
199+
200+
@staticmethod
201+
def _now() -> int:
202+
"""Current unix timestamp in seconds."""
203+
return int(datetime.datetime.now().timestamp())
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""
2+
Primary interface for the Circuit Breaker utility.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import functools
8+
import logging
9+
import os
10+
import warnings
11+
from typing import TYPE_CHECKING, Any
12+
13+
from aws_lambda_powertools.shared import constants
14+
from aws_lambda_powertools.shared.functions import strtobool
15+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.base import CircuitBreakerHandler
16+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.config import CircuitBreakerConfig
17+
from aws_lambda_powertools.warnings import PowertoolsUserWarning
18+
19+
if TYPE_CHECKING:
20+
from collections.abc import Callable
21+
22+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.persistence.base import (
23+
CircuitBreakerPersistenceLayer,
24+
)
25+
26+
logger = logging.getLogger(__name__)
27+
28+
29+
def circuit_breaker(
30+
name: str,
31+
persistence_store: CircuitBreakerPersistenceLayer,
32+
on_circuit_open: Callable | None = None,
33+
on_transition: Callable | None = None,
34+
config: CircuitBreakerConfig | None = None,
35+
) -> Callable:
36+
"""
37+
Protect a function that calls an unhealthy-prone downstream with a circuit breaker.
38+
39+
Wrap the function that makes the downstream call, not the whole Lambda handler, so a
40+
tripped circuit reflects one dependency rather than unrelated handler logic.
41+
42+
When the circuit is open the protected function is not called. Instead, if an
43+
``on_circuit_open`` callback is registered it runs and its return value becomes the
44+
call's result; otherwise :class:`CircuitBreakerOpenError` is raised.
45+
46+
Parameters
47+
----------
48+
name : str
49+
Unique circuit name. Each name is an independent circuit; a function calling
50+
several backends should use one circuit per backend.
51+
persistence_store : CircuitBreakerPersistenceLayer
52+
Shared state store (for example ``CircuitBreakerDynamoDBPersistence``).
53+
on_circuit_open : Callable | None
54+
Called when the circuit is open, with the protected function's own arguments
55+
(positional stay positional, keyword stay keyword) plus a trailing ``circuit``
56+
keyword argument carrying a ``CircuitInfo``. Its return value becomes the call's
57+
result. If ``None``, an open circuit raises ``CircuitBreakerOpenError``.
58+
on_transition : Callable | None
59+
Called with a single ``CircuitTransition`` argument whenever the circuit changes
60+
state (open, probe, close, reopen). Fires only on transitions, never on the
61+
per-invocation hot path, so it is a safe place to emit a CloudWatch metric. Any
62+
exception it raises is swallowed and logged so observability never breaks the
63+
protected call.
64+
config : CircuitBreakerConfig | None
65+
Tunables. Defaults to ``CircuitBreakerConfig()`` when omitted.
66+
67+
Returns
68+
-------
69+
Callable
70+
The decorated function.
71+
72+
Example
73+
-------
74+
**Protect a payment backend, buffering rejected requests**
75+
76+
from aws_lambda_powertools.utilities.circuit_breaker_alpha import circuit_breaker, CircuitInfo
77+
from aws_lambda_powertools.utilities.circuit_breaker_alpha.persistence import (
78+
CircuitBreakerDynamoDBPersistence,
79+
)
80+
81+
persistence = CircuitBreakerDynamoDBPersistence(table_name="CircuitBreakerState")
82+
83+
def buffer(order: dict, circuit: CircuitInfo):
84+
sqs.send_message(QueueUrl=url, MessageBody=json.dumps(order))
85+
86+
@circuit_breaker(name="payment-backend", persistence_store=persistence, on_circuit_open=buffer)
87+
def charge(order: dict) -> dict:
88+
return payment_api.charge(order)
89+
"""
90+
config = config or CircuitBreakerConfig()
91+
92+
def decorator(function: Callable) -> Callable:
93+
@functools.wraps(function)
94+
def wrapper(*args, **kwargs) -> Any:
95+
# Skip the circuit entirely when disabled (development only).
96+
if strtobool(os.getenv(constants.CIRCUIT_BREAKER_DISABLED_ENV, "false")):
97+
warnings.warn(
98+
message="Disabling the circuit breaker is intended for development environments only "
99+
"and should not be used in production.",
100+
category=PowertoolsUserWarning,
101+
stacklevel=2,
102+
)
103+
return function(*args, **kwargs)
104+
105+
handler = CircuitBreakerHandler(
106+
function=function,
107+
name=name,
108+
config=config,
109+
persistence_store=persistence_store,
110+
on_circuit_open=on_circuit_open,
111+
on_transition=on_transition,
112+
function_args=args,
113+
function_kwargs=kwargs,
114+
)
115+
return handler.handle()
116+
117+
return wrapper
118+
119+
return decorator

0 commit comments

Comments
 (0)