|
1 | 1 | """Check if the Llama Stack version is supported by the LCS.""" |
2 | 2 |
|
| 3 | +import asyncio |
3 | 4 | import re |
4 | 5 |
|
| 6 | +from llama_stack_client import APIConnectionError |
5 | 7 | from llama_stack_client._client import AsyncLlamaStackClient |
6 | 8 | from semver import Version |
7 | 9 |
|
|
13 | 15 |
|
14 | 16 | logger = get_logger(__name__) |
15 | 17 |
|
| 18 | +# Retry settings for waiting on Llama Stack readiness during startup. |
| 19 | +# When LCS runs as a sidecar alongside Llama Stack, both containers start |
| 20 | +# concurrently and Llama Stack may not be ready when LCS attempts its |
| 21 | +# first version check. |
| 22 | +_DEFAULT_MAX_RETRIES = 5 |
| 23 | +_DEFAULT_RETRY_DELAY = 2 |
| 24 | + |
16 | 25 |
|
17 | 26 | class InvalidLlamaStackVersionException(Exception): |
18 | 27 | """Llama Stack version is not valid.""" |
19 | 28 |
|
20 | 29 |
|
21 | 30 | async def check_llama_stack_version( |
22 | 31 | client: AsyncLlamaStackClient, |
| 32 | + max_retries: int = _DEFAULT_MAX_RETRIES, |
| 33 | + retry_delay: int = _DEFAULT_RETRY_DELAY, |
23 | 34 | ) -> None: |
24 | 35 | """ |
25 | 36 | Verify the connected Llama Stack's version is within the supported range. |
26 | 37 |
|
27 | | - This coroutine fetches the Llama Stack version from the |
28 | | - provided client and validates it against the configured minimal |
29 | | - and maximal supported versions. Raises |
30 | | - InvalidLlamaStackVersionException if the detected version is |
31 | | - outside the supported range. |
| 38 | + This coroutine fetches the Llama Stack version from the provided client |
| 39 | + and validates it against the configured minimal and maximal supported |
| 40 | + versions. Connection attempts are retried with a fixed delay to handle |
| 41 | + the case where Llama Stack is still starting up (e.g., when running as |
| 42 | + a sidecar in the same pod). |
| 43 | +
|
| 44 | + Args: |
| 45 | + client: The async Llama Stack client. |
| 46 | + max_retries: Maximum number of connection attempts before giving up. |
| 47 | + retry_delay: Delay in seconds between retry attempts. |
32 | 48 |
|
33 | 49 | Raises: |
| 50 | + APIConnectionError: If Llama Stack is unreachable after all retries. |
34 | 51 | InvalidLlamaStackVersionException: If the detected version is outside |
35 | 52 | the supported range or cannot be parsed. |
36 | 53 | """ |
37 | | - version_info = await client.inspect.version() |
38 | | - compare_versions( |
39 | | - version_info.version, |
40 | | - MINIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
41 | | - MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
42 | | - ) |
| 54 | + for attempt in range(max_retries): |
| 55 | + try: |
| 56 | + version_info = await client.inspect.version() |
| 57 | + compare_versions( |
| 58 | + version_info.version, |
| 59 | + MINIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
| 60 | + MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
| 61 | + ) |
| 62 | + return |
| 63 | + except APIConnectionError: |
| 64 | + if attempt == max_retries - 1: |
| 65 | + raise |
| 66 | + logger.warning( |
| 67 | + "Llama Stack not ready (attempt %d/%d), retrying in %ds...", |
| 68 | + attempt + 1, |
| 69 | + max_retries, |
| 70 | + retry_delay, |
| 71 | + ) |
| 72 | + await asyncio.sleep(retry_delay) |
43 | 73 |
|
44 | 74 |
|
45 | 75 | def compare_versions(version_info: str, minimal: str, maximal: str) -> None: |
|
0 commit comments