|
1 | 1 | """Check if the Llama Stack version is supported by the LCS.""" |
2 | 2 |
|
| 3 | +import asyncio |
3 | 4 | import re |
4 | 5 |
|
5 | | -from llama_stack_client._client import AsyncLlamaStackClient |
| 6 | +from llama_stack_client import APIConnectionError, AsyncLlamaStackClient |
6 | 7 | from semver import Version |
7 | 8 |
|
8 | 9 | from constants import ( |
|
13 | 14 |
|
14 | 15 | logger = get_logger(__name__) |
15 | 16 |
|
| 17 | +# Retry settings for waiting on Llama Stack readiness during startup. |
| 18 | +# When LCS runs as a sidecar alongside Llama Stack, both containers start |
| 19 | +# concurrently and Llama Stack may not be ready when LCS attempts its |
| 20 | +# first version check. |
| 21 | +_DEFAULT_MAX_RETRIES = 5 |
| 22 | +_DEFAULT_RETRY_DELAY = 2 |
| 23 | + |
16 | 24 |
|
17 | 25 | class InvalidLlamaStackVersionException(Exception): |
18 | 26 | """Llama Stack version is not valid.""" |
19 | 27 |
|
20 | 28 |
|
21 | 29 | async def check_llama_stack_version( |
22 | 30 | client: AsyncLlamaStackClient, |
| 31 | + max_retries: int = _DEFAULT_MAX_RETRIES, |
| 32 | + retry_delay: int = _DEFAULT_RETRY_DELAY, |
23 | 33 | ) -> None: |
24 | 34 | """ |
25 | 35 | Verify the connected Llama Stack's version is within the supported range. |
26 | 36 |
|
27 | | - This coroutine fetches the Llama Stack version from the |
28 | | - provided client and validates it against the configured minimal |
29 | | - and maximal supported versions. Raises |
30 | | - InvalidLlamaStackVersionException if the detected version is |
31 | | - outside the supported range. |
| 37 | + This coroutine fetches the Llama Stack version from the provided client |
| 38 | + and validates it against the configured minimal and maximal supported |
| 39 | + versions. Connection attempts are retried with a fixed delay to handle |
| 40 | + the case where Llama Stack is still starting up (e.g., when running as |
| 41 | + a sidecar in the same pod). |
| 42 | +
|
| 43 | + Args: |
| 44 | + client: The async Llama Stack client. |
| 45 | + max_retries: Maximum number of connection attempts before giving up. |
| 46 | + retry_delay: Delay in seconds between retry attempts. |
32 | 47 |
|
33 | 48 | Raises: |
| 49 | + APIConnectionError: If Llama Stack is unreachable after all retries. |
34 | 50 | InvalidLlamaStackVersionException: If the detected version is outside |
35 | 51 | the supported range or cannot be parsed. |
36 | 52 | """ |
37 | | - version_info = await client.inspect.version() |
38 | | - compare_versions( |
39 | | - version_info.version, |
40 | | - MINIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
41 | | - MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
42 | | - ) |
| 53 | + if max_retries < 1: |
| 54 | + raise ValueError("max_retries must be >= 1") |
| 55 | + |
| 56 | + for attempt in range(max_retries): |
| 57 | + try: |
| 58 | + version_info = await client.inspect.version() |
| 59 | + compare_versions( |
| 60 | + version_info.version, |
| 61 | + MINIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
| 62 | + MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION, |
| 63 | + ) |
| 64 | + return |
| 65 | + except APIConnectionError: |
| 66 | + if attempt == max_retries - 1: |
| 67 | + raise |
| 68 | + logger.warning( |
| 69 | + "Llama Stack not ready (attempt %d/%d), retrying in %ds...", |
| 70 | + attempt + 1, |
| 71 | + max_retries, |
| 72 | + retry_delay, |
| 73 | + ) |
| 74 | + await asyncio.sleep(retry_delay) |
43 | 75 |
|
44 | 76 |
|
45 | 77 | def compare_versions(version_info: str, minimal: str, maximal: str) -> None: |
|
0 commit comments