Skip to content

Commit 96616f1

Browse files
committed
LCORE-1599: Add retry with backoff to check_llama_stack_version
Signed-off-by: Savitoj Singh <savsingh@redhat.com>
1 parent 1ee31fc commit 96616f1

2 files changed

Lines changed: 68 additions & 11 deletions

File tree

src/utils/llama_stack_version.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""Check if the Llama Stack version is supported by the LCS."""
22

3+
import asyncio
34
import re
45

6+
from llama_stack_client import APIConnectionError
57
from llama_stack_client._client import AsyncLlamaStackClient
68
from semver import Version
79

@@ -13,33 +15,61 @@
1315

1416
logger = get_logger(__name__)
1517

18+
# Retry settings for waiting on Llama Stack readiness during startup.
19+
# When LCS runs as a sidecar alongside Llama Stack, both containers start
20+
# concurrently and Llama Stack may not be ready when LCS attempts its
21+
# first version check.
22+
_DEFAULT_MAX_RETRIES = 5
23+
_DEFAULT_RETRY_DELAY = 2
24+
1625

1726
class InvalidLlamaStackVersionException(Exception):
1827
"""Llama Stack version is not valid."""
1928

2029

2130
async def check_llama_stack_version(
2231
client: AsyncLlamaStackClient,
32+
max_retries: int = _DEFAULT_MAX_RETRIES,
33+
retry_delay: int = _DEFAULT_RETRY_DELAY,
2334
) -> None:
2435
"""
2536
Verify the connected Llama Stack's version is within the supported range.
2637
27-
This coroutine fetches the Llama Stack version from the
28-
provided client and validates it against the configured minimal
29-
and maximal supported versions. Raises
30-
InvalidLlamaStackVersionException if the detected version is
31-
outside the supported range.
38+
This coroutine fetches the Llama Stack version from the provided client
39+
and validates it against the configured minimal and maximal supported
40+
versions. Connection attempts are retried with a fixed delay to handle
41+
the case where Llama Stack is still starting up (e.g., when running as
42+
a sidecar in the same pod).
43+
44+
Args:
45+
client: The async Llama Stack client.
46+
max_retries: Maximum number of connection attempts before giving up.
47+
retry_delay: Delay in seconds between retry attempts.
3248
3349
Raises:
50+
APIConnectionError: If Llama Stack is unreachable after all retries.
3451
InvalidLlamaStackVersionException: If the detected version is outside
3552
the supported range or cannot be parsed.
3653
"""
37-
version_info = await client.inspect.version()
38-
compare_versions(
39-
version_info.version,
40-
MINIMAL_SUPPORTED_LLAMA_STACK_VERSION,
41-
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION,
42-
)
54+
for attempt in range(max_retries):
55+
try:
56+
version_info = await client.inspect.version()
57+
compare_versions(
58+
version_info.version,
59+
MINIMAL_SUPPORTED_LLAMA_STACK_VERSION,
60+
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION,
61+
)
62+
return
63+
except APIConnectionError:
64+
if attempt == max_retries - 1:
65+
raise
66+
logger.warning(
67+
"Llama Stack not ready (attempt %d/%d), retrying in %ds...",
68+
attempt + 1,
69+
max_retries,
70+
retry_delay,
71+
)
72+
await asyncio.sleep(retry_delay)
4373

4474

4575
def compare_versions(version_info: str, minimal: str, maximal: str) -> None:

tests/unit/utils/test_llama_stack_version.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any
44

55
import pytest
6+
from llama_stack_client import APIConnectionError
67
from llama_stack_client.types import VersionInfo
78
from pytest_mock import MockerFixture
89
from pytest_subtests import SubTests
@@ -115,3 +116,29 @@ async def test_check_llama_stack_version_too_big_version(
115116
with subtests.test(msg="Increased all numbers"):
116117
bigger_version = max_version.bump_major().bump_minor().bump_patch()
117118
await _check_version_must_fail(mock_client, bigger_version)
119+
120+
121+
@pytest.mark.asyncio
122+
async def test_check_llama_stack_version_retries_on_connection_error(
123+
mocker: MockerFixture,
124+
) -> None:
125+
"""Test that check_llama_stack_version retries on APIConnectionError."""
126+
mock_client = mocker.AsyncMock()
127+
mock_sleep = mocker.patch("utils.llama_stack_version.asyncio.sleep")
128+
129+
# Fail twice with connection error, then succeed
130+
mock_client.inspect.version.side_effect = [
131+
APIConnectionError(request=mocker.MagicMock()),
132+
APIConnectionError(request=mocker.MagicMock()),
133+
VersionInfo(version=MINIMAL_SUPPORTED_LLAMA_STACK_VERSION),
134+
]
135+
136+
await check_llama_stack_version(mock_client, max_retries=5, retry_delay=1)
137+
138+
assert mock_client.inspect.version.call_count == 3
139+
assert mock_sleep.call_count == 2
140+
141+
142+
143+
144+

0 commit comments

Comments
 (0)