Skip to content

Commit 4661496

Browse files
mrm9084Copilot
andauthored
App Configuration - Startup Retry (#44873)
* Adding StartupOptions * Adding Tests * Update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixing checks * Update README.md * review items * review updates * remove is failoverable change * Update test_startup_retry.py * Fixing formatting * remove 404 logic * Update _azureappconfigurationproviderasync.py * remove broad exception * Updated startup_timeout check * rename of backoff items * Update test_startup_retry.py * Update _azureappconfigurationproviderbase.py * Update test_startup_retry.py --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 26d86ba commit 4661496

6 files changed

Lines changed: 531 additions & 17 deletions

File tree

sdk/appconfiguration/azure-appconfiguration-provider/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Features Added
66

7+
- Added startup retry, if the initial load fails, the provider will retry until the startup timeout is reached. By default the retry period is 100s, and can be configured via the `startup_timeout` kwarg on the `load` method.
78
- Adds support for adding `audience` to the kwargs for `load` allowing it to specify the audience for the request.
89

910
### Breaking Changes

sdk/appconfiguration/azure-appconfiguration-provider/azure/appconfiguration/provider/_azureappconfigurationprovider.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# license information.
55
# -------------------------------------------------------------------------
66
import datetime
7+
import time
78
import logging
89
from typing import (
910
Any,
@@ -27,13 +28,15 @@
2728
from ._constants import (
2829
FEATURE_MANAGEMENT_KEY,
2930
FEATURE_FLAG_KEY,
31+
DEFAULT_STARTUP_TIMEOUT,
3032
SNAPSHOT_REF_CONTENT_TYPE,
3133
)
3234
from ._azureappconfigurationproviderbase import (
3335
AzureAppConfigurationProviderBase,
3436
delay_failure,
3537
process_load_parameters,
3638
sdk_allowed_kwargs,
39+
_get_startup_backoff,
3740
)
3841
from ._client_manager import ConfigurationClientManager, _ConfigurationClientWrapper as ConfigurationClient
3942
from ._user_agent import USER_AGENT
@@ -60,6 +63,7 @@ def load( # pylint: disable=docstring-keyword-should-match-keyword-only
6063
feature_flag_enabled: bool = False,
6164
feature_flag_selectors: Optional[List[SettingSelector]] = None,
6265
feature_flag_refresh_enabled: bool = False,
66+
startup_timeout: int = DEFAULT_STARTUP_TIMEOUT,
6367
**kwargs,
6468
) -> "AzureAppConfigurationProvider":
6569
"""
@@ -104,6 +108,9 @@ def load( # pylint: disable=docstring-keyword-should-match-keyword-only
104108
:keyword configuration_mapper: Optional function to map configuration settings. Enables transformation of
105109
configurations before they are added to the provider.
106110
:paramtype configuration_mapper: Optional[Callable[[ConfigurationSetting], None]]
111+
:keyword startup_timeout: The amount of time in seconds allowed to load data from Azure App Configuration on
112+
startup. The default value is 100 seconds.
113+
:paramtype startup_timeout: int
107114
"""
108115

109116

@@ -124,6 +131,7 @@ def load( # pylint: disable=docstring-keyword-should-match-keyword-only
124131
feature_flag_enabled: bool = False,
125132
feature_flag_selectors: Optional[List[SettingSelector]] = None,
126133
feature_flag_refresh_enabled: bool = False,
134+
startup_timeout: int = DEFAULT_STARTUP_TIMEOUT,
127135
**kwargs,
128136
) -> "AzureAppConfigurationProvider":
129137
"""
@@ -170,6 +178,9 @@ def load( # pylint: disable=docstring-keyword-should-match-keyword-only
170178
:keyword configuration_mapper: Optional function to map configuration settings. Enables transformation of
171179
configurations before they are added to the provider.
172180
:paramtype configuration_mapper: Optional[Callable[[ConfigurationSetting], None]]
181+
:keyword startup_timeout: The amount of time in seconds allowed to load data from Azure App Configuration on
182+
startup. The default value is 100 seconds.
183+
:paramtype startup_timeout: int
173184
"""
174185

175186

@@ -184,6 +195,7 @@ def load(*args, **kwargs) -> "AzureAppConfigurationProvider":
184195
params["endpoint"],
185196
params["credential"],
186197
uses_key_vault=params["uses_key_vault"],
198+
startup_timeout=params["startup_timeout"],
187199
**params["kwargs"],
188200
)
189201
kwargs = sdk_allowed_kwargs(params["kwargs"])
@@ -234,6 +246,8 @@ def __init__(self, **kwargs: Any) -> None:
234246
min_backoff: int = min(kwargs.pop("min_backoff", 30), interval)
235247
max_backoff: int = min(kwargs.pop("max_backoff", 600), interval)
236248

249+
self._startup_timeout: int = kwargs.pop("startup_timeout", DEFAULT_STARTUP_TIMEOUT)
250+
237251
self._replica_client_manager = ConfigurationClientManager(
238252
connection_string=kwargs.pop("connection_string"),
239253
endpoint=kwargs.pop("endpoint"),
@@ -363,16 +377,47 @@ def refresh(self, **kwargs) -> None:
363377
self._refresh_lock.release()
364378

365379
def _load_all(self, **kwargs: Any) -> None:
380+
startup_start_time = datetime.datetime.now()
381+
exponential_backoff_attempts = 0
382+
startup_exceptions: List[Exception] = []
383+
384+
while True:
385+
# Try to initialize from all available clients
386+
if self._try_initialize(startup_exceptions, **kwargs):
387+
return # Successfully loaded
388+
389+
# Calculate delay before next retry attempt
390+
elapsed_seconds = (datetime.datetime.now() - startup_start_time).total_seconds()
391+
delay, is_exponential_backoff = _get_startup_backoff(elapsed_seconds, exponential_backoff_attempts)
392+
393+
if is_exponential_backoff:
394+
exponential_backoff_attempts += 1
395+
396+
# Check if delay would exceed remaining timeout
397+
remaining_timeout = self._startup_timeout - elapsed_seconds
398+
if delay > remaining_timeout:
399+
raise TimeoutError(
400+
"The provider timed out while attempting to load.",
401+
startup_exceptions,
402+
)
403+
404+
if delay > 0:
405+
time.sleep(delay)
406+
407+
def _try_initialize(self, startup_exceptions: List[Exception], **kwargs: Any) -> bool:
408+
"""
409+
Try to initialize the provider from all available clients.
410+
411+
:param startup_exceptions: List to collect exceptions from failed attempts.
412+
:type startup_exceptions: List[Exception]
413+
:return: True if initialization succeeded, False otherwise.
414+
:rtype: bool
415+
"""
366416
self._replica_client_manager.refresh_clients()
367417
self._replica_client_manager.find_active_clients()
368418
is_failover_request = False
369419
replica_count = self._replica_client_manager.get_client_count() - 1
370420

371-
error_message = """
372-
Failed to load configuration settings. No Azure App Configuration stores successfully loaded from.
373-
"""
374-
exception: Exception = RuntimeError(error_message)
375-
376421
while client := self._replica_client_manager.get_next_active_client():
377422
headers = self._update_correlation_context_header(
378423
kwargs.pop("headers", {}),
@@ -415,13 +460,15 @@ def _load_all(self, **kwargs: Any) -> None:
415460
with self._update_lock:
416461
self._watched_settings = watched_settings
417462
self._dict = processed_settings
418-
return
463+
return True
419464
except AzureError as e:
420-
exception = e
421465
logger.warning("Failed to load configurations from endpoint %s.\n %s", client.endpoint, e.message)
422466
self._replica_client_manager.backoff(client)
423467
is_failover_request = True
424-
raise exception
468+
469+
startup_exceptions.append(e)
470+
471+
return False
425472

426473
def _expand_snapshot_references(
427474
self, configuration_settings: List[ConfigurationSetting], client: ConfigurationClient

sdk/appconfiguration/azure-appconfiguration-provider/azure/appconfiguration/provider/_azureappconfigurationproviderbase.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import json
99
import time
1010
import datetime
11+
import random
1112
from threading import Lock
1213
import logging
1314
from typing import (
@@ -41,6 +42,11 @@
4142
APP_CONFIG_AICC_MIME_PROFILE,
4243
FEATURE_MANAGEMENT_KEY,
4344
FEATURE_FLAG_KEY,
45+
DEFAULT_STARTUP_TIMEOUT,
46+
MAX_STARTUP_BACKOFF_DURATION,
47+
MIN_STARTUP_EXPONENTIAL_BACKOFF_DURATION,
48+
JITTER_RATIO,
49+
STARTUP_BACKOFF_INTERVALS,
4450
)
4551
from ._refresh_timer import _RefreshTimer
4652
from ._request_tracing_context import _RequestTracingContext
@@ -122,12 +128,18 @@ def process_load_parameters(*args, **kwargs: Any) -> Dict[str, Any]:
122128
or kwargs.get("uses_key_vault", False)
123129
)
124130

131+
# Get startup timeout
132+
startup_timeout = kwargs.pop("startup_timeout", DEFAULT_STARTUP_TIMEOUT)
133+
if startup_timeout < 0:
134+
raise ValueError("Startup timeout must be greater than or equal to 0 seconds.")
135+
125136
return {
126137
"endpoint": endpoint,
127138
"credential": credential,
128139
"connection_string": connection_string,
129140
"uses_key_vault": uses_key_vault,
130141
"start_time": start_time,
142+
"startup_timeout": startup_timeout,
131143
"kwargs": kwargs,
132144
}
133145

@@ -199,6 +211,71 @@ def sdk_allowed_kwargs(kwargs):
199211
return {k: v for k, v in kwargs.items() if k in allowed_kwargs}
200212

201213

214+
def _jitter(duration: float, ratio: float = JITTER_RATIO) -> float:
215+
"""
216+
Apply jitter to a duration value.
217+
218+
:param duration: The base duration in seconds.
219+
:type duration: float
220+
:param ratio: The jitter ratio (0 to 1). Default is 0.25 (25% jitter means +/- 25% variation).
221+
:type ratio: float
222+
:return: The jittered duration in seconds.
223+
:rtype: float
224+
"""
225+
if ratio < 0 or ratio > 1:
226+
raise ValueError("Jitter ratio must be between 0 and 1.")
227+
if ratio == 0:
228+
return duration
229+
jitter = ratio * (random.random() * 2 - 1)
230+
return duration * (1 + jitter)
231+
232+
233+
def _get_startup_backoff(elapsed_seconds: float, attempts: int) -> Tuple[float, bool]:
234+
"""
235+
Get a backoff duration based on elapsed startup time.
236+
237+
:param elapsed_seconds: The time elapsed since startup began, in seconds.
238+
:type elapsed_seconds: float
239+
:param attempts: The number of retry attempts made (1-based).
240+
:type attempts: int
241+
:return: A tuple where the first element is the backoff duration in seconds,
242+
and the second element indicates if the fixed backoff window has been exceeded.
243+
:rtype: Tuple[float, bool]
244+
"""
245+
for threshold, backoff in STARTUP_BACKOFF_INTERVALS:
246+
if elapsed_seconds < threshold:
247+
return backoff, False
248+
return _calculate_backoff_duration(attempts), True
249+
250+
251+
def _calculate_backoff_duration(attempts: int) -> float:
252+
"""
253+
Calculate the jittered exponential backoff duration.
254+
255+
:param attempts: The number of retry attempts made (1-based).
256+
:type attempts: int
257+
:return: The calculated backoff duration with jitter applied.
258+
:rtype: float
259+
"""
260+
attempts += 1
261+
if attempts < 1:
262+
raise ValueError("Number of attempts must be at least 1.")
263+
264+
if attempts == 1:
265+
return MIN_STARTUP_EXPONENTIAL_BACKOFF_DURATION
266+
267+
# Calculate exponential backoff: min * 2^(attempts-1)
268+
# Cap the shift amount to prevent overflow
269+
safe_shift = min(attempts - 1, 63)
270+
calculated = MIN_STARTUP_EXPONENTIAL_BACKOFF_DURATION * (1 << safe_shift)
271+
272+
# Cap at max duration
273+
if calculated > MAX_STARTUP_BACKOFF_DURATION or calculated <= 0: # Check for overflow
274+
calculated = MAX_STARTUP_BACKOFF_DURATION
275+
276+
return _jitter(calculated, JITTER_RATIO)
277+
278+
202279
class AzureAppConfigurationProviderBase(Mapping[str, Union[str, JSON]]): # pylint: disable=too-many-instance-attributes
203280
"""
204281
Provides a dictionary-like interface to Azure App Configuration settings. Enables loading of sets of configuration

sdk/appconfiguration/azure-appconfiguration-provider/azure/appconfiguration/provider/_constants.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,32 @@
3737
# ------------------------------------------------------------------------
3838
APP_CONFIG_AI_MIME_PROFILE = "https://azconfig.io/mime-profiles/ai/"
3939
APP_CONFIG_AICC_MIME_PROFILE = "https://azconfig.io/mime-profiles/ai/chat-completion"
40-
SNAPSHOT_REF_CONTENT_TYPE = 'application/json; profile="https://azconfig.io/mime-profiles/snapshot-ref"; charset=utf-8'
40+
41+
# =============================================================================
42+
# Startup Retry Constants
43+
# =============================================================================
44+
# Timeout
45+
DEFAULT_STARTUP_TIMEOUT = 100 # seconds
46+
47+
# Backoff durations
48+
MIN_STARTUP_EXPONENTIAL_BACKOFF_DURATION = 30 # seconds - minimum backoff after fixed window
49+
MAX_STARTUP_BACKOFF_DURATION = 600 # seconds (10 minutes) - caps exponential backoff
50+
51+
# Jitter ratio for randomizing backoff durations (+/- 25% variation)
52+
JITTER_RATIO = 0.25
53+
54+
# Fixed backoff intervals: (elapsed_time_threshold, backoff_duration)
55+
# Defines fixed backoff durations based on how long startup has been attempting
56+
STARTUP_BACKOFF_INTERVALS = [
57+
(100, 5), # 0-100 seconds elapsed: 5 second backoff
58+
(200, 10), # 100-200 seconds elapsed: 10 second backoff
59+
(600, MIN_STARTUP_EXPONENTIAL_BACKOFF_DURATION), # 200-600 seconds elapsed: 30 second backoff
60+
]
4161

4262
# ------------------------------------------------------------------------
4363
# Snapshot Reference Constants
4464
# ------------------------------------------------------------------------
65+
SNAPSHOT_REF_CONTENT_TYPE = 'application/json; profile="https://azconfig.io/mime-profiles/snapshot-ref"; charset=utf-8'
4566
SNAPSHOT_NAME_FIELD = "snapshot_name"
4667

4768
# ------------------------------------------------------------------------

0 commit comments

Comments
 (0)