Skip to content

Commit 409cdc2

Browse files
authored
Merge pull request #1409 from savitojs/savitojs/LCORE-1599-retry-llama-stack-version-check
LCORE-1599: Add retry with backoff to check_llama_stack_version
2 parents 56ce243 + 5b25619 commit 409cdc2

2 files changed

Lines changed: 85 additions & 12 deletions

File tree

src/utils/llama_stack_version.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""Check if the Llama Stack version is supported by the LCS."""
22

3+
import asyncio
34
import re
45

5-
from llama_stack_client._client import AsyncLlamaStackClient
6+
from llama_stack_client import APIConnectionError, AsyncLlamaStackClient
67
from semver import Version
78

89
from constants import (
@@ -13,33 +14,64 @@
1314

1415
logger = get_logger(__name__)
1516

17+
# Retry settings for waiting on Llama Stack readiness during startup.
18+
# When LCS runs as a sidecar alongside Llama Stack, both containers start
19+
# concurrently and Llama Stack may not be ready when LCS attempts its
20+
# first version check.
21+
_DEFAULT_MAX_RETRIES = 5
22+
_DEFAULT_RETRY_DELAY = 2
23+
1624

1725
class InvalidLlamaStackVersionException(Exception):
1826
"""Llama Stack version is not valid."""
1927

2028

2129
async def check_llama_stack_version(
2230
client: AsyncLlamaStackClient,
31+
max_retries: int = _DEFAULT_MAX_RETRIES,
32+
retry_delay: int = _DEFAULT_RETRY_DELAY,
2333
) -> None:
2434
"""
2535
Verify the connected Llama Stack's version is within the supported range.
2636
27-
This coroutine fetches the Llama Stack version from the
28-
provided client and validates it against the configured minimal
29-
and maximal supported versions. Raises
30-
InvalidLlamaStackVersionException if the detected version is
31-
outside the supported range.
37+
This coroutine fetches the Llama Stack version from the provided client
38+
and validates it against the configured minimal and maximal supported
39+
versions. Connection attempts are retried with a fixed delay to handle
40+
the case where Llama Stack is still starting up (e.g., when running as
41+
a sidecar in the same pod).
42+
43+
Args:
44+
client: The async Llama Stack client.
45+
max_retries: Maximum number of connection attempts before giving up.
46+
retry_delay: Delay in seconds between retry attempts.
3247
3348
Raises:
49+
APIConnectionError: If Llama Stack is unreachable after all retries.
3450
InvalidLlamaStackVersionException: If the detected version is outside
3551
the supported range or cannot be parsed.
3652
"""
37-
version_info = await client.inspect.version()
38-
compare_versions(
39-
version_info.version,
40-
MINIMAL_SUPPORTED_LLAMA_STACK_VERSION,
41-
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION,
42-
)
53+
if max_retries < 1:
54+
raise ValueError("max_retries must be >= 1")
55+
56+
for attempt in range(max_retries):
57+
try:
58+
version_info = await client.inspect.version()
59+
compare_versions(
60+
version_info.version,
61+
MINIMAL_SUPPORTED_LLAMA_STACK_VERSION,
62+
MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION,
63+
)
64+
return
65+
except APIConnectionError:
66+
if attempt == max_retries - 1:
67+
raise
68+
logger.warning(
69+
"Llama Stack not ready (attempt %d/%d), retrying in %ds...",
70+
attempt + 1,
71+
max_retries,
72+
retry_delay,
73+
)
74+
await asyncio.sleep(retry_delay)
4375

4476

4577
def compare_versions(version_info: str, minimal: str, maximal: str) -> None:

tests/unit/utils/test_llama_stack_version.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any
44

55
import pytest
6+
from llama_stack_client import APIConnectionError
67
from llama_stack_client.types import VersionInfo
78
from pytest_mock import MockerFixture
89
from pytest_subtests import SubTests
@@ -115,3 +116,43 @@ async def test_check_llama_stack_version_too_big_version(
115116
with subtests.test(msg="Increased all numbers"):
116117
bigger_version = max_version.bump_major().bump_minor().bump_patch()
117118
await _check_version_must_fail(mock_client, bigger_version)
119+
120+
121+
@pytest.mark.asyncio
122+
async def test_check_llama_stack_version_retries_on_connection_error(
123+
mocker: MockerFixture,
124+
) -> None:
125+
"""Test that check_llama_stack_version retries on APIConnectionError."""
126+
mock_client = mocker.AsyncMock()
127+
mock_sleep = mocker.patch("utils.llama_stack_version.asyncio.sleep")
128+
129+
# Fail twice with connection error, then succeed
130+
mock_client.inspect.version.side_effect = [
131+
APIConnectionError(request=mocker.MagicMock()),
132+
APIConnectionError(request=mocker.MagicMock()),
133+
VersionInfo(version=MINIMAL_SUPPORTED_LLAMA_STACK_VERSION),
134+
]
135+
136+
await check_llama_stack_version(mock_client, max_retries=5, retry_delay=1)
137+
138+
assert mock_client.inspect.version.call_count == 3
139+
assert mock_sleep.call_count == 2
140+
141+
142+
@pytest.mark.asyncio
143+
async def test_check_llama_stack_version_raises_after_max_retries(
144+
mocker: MockerFixture,
145+
) -> None:
146+
"""Test that check_llama_stack_version raises after all retries are exhausted."""
147+
mock_client = mocker.AsyncMock()
148+
mock_sleep = mocker.patch("utils.llama_stack_version.asyncio.sleep")
149+
150+
mock_client.inspect.version.side_effect = APIConnectionError(
151+
request=mocker.MagicMock()
152+
)
153+
154+
with pytest.raises(APIConnectionError):
155+
await check_llama_stack_version(mock_client, max_retries=3, retry_delay=1)
156+
157+
assert mock_client.inspect.version.call_count == 3
158+
assert mock_sleep.call_count == 2

0 commit comments

Comments
 (0)