|
5 | 5 |
|
6 | 6 | import logging |
7 | 7 | import asyncio |
| 8 | +import os |
8 | 9 | from datetime import datetime |
9 | 10 | from typing import Dict, Any, List |
10 | 11 | from fastapi import APIRouter, HTTPException, status |
@@ -382,12 +383,12 @@ async def services_health(): |
382 | 383 |
|
383 | 384 | services_status = [] |
384 | 385 |
|
385 | | - async def check_service_endpoint(name: str, url: str, timeout: float = 5.0) -> Dict[str, Any]: |
| 386 | + async def check_service_endpoint(name: str, url: str, timeout: float = 5.0, headers: Dict[str, str] = None) -> Dict[str, Any]: |
386 | 387 | """Helper to check a service health endpoint""" |
387 | 388 | try: |
388 | 389 | start = datetime.utcnow() |
389 | 390 | async with httpx.AsyncClient(timeout=timeout) as client: |
390 | | - response = await client.get(url) |
| 391 | + response = await client.get(url, headers=headers) |
391 | 392 | latency_ms = (datetime.utcnow() - start).total_seconds() * 1000 |
392 | 393 |
|
393 | 394 | is_healthy = response.status_code in [200, 201] |
@@ -417,9 +418,29 @@ async def check_service_endpoint(name: str, url: str, timeout: float = 5.0) -> D |
417 | 418 | "http://uchub-keycloak:8080/health/ready" |
418 | 419 | ) |
419 | 420 |
|
| 421 | + # LiteLLM: try multiple hostnames since env var may have non-resolving hostname |
| 422 | + litellm_urls = [ |
| 423 | + os.getenv("LITELLM_PROXY_URL", "").rstrip("/"), |
| 424 | + "http://unicorn-litellm-wilmer:4000", |
| 425 | + "http://litellm:4000", |
| 426 | + ] |
| 427 | + # Pick first URL that has a resolvable host |
| 428 | + litellm_url = "http://unicorn-litellm-wilmer:4000" |
| 429 | + for url in litellm_urls: |
| 430 | + if url: |
| 431 | + try: |
| 432 | + import urllib.parse |
| 433 | + host = urllib.parse.urlparse(url).hostname |
| 434 | + if host: |
| 435 | + import socket |
| 436 | + socket.gethostbyname(host) |
| 437 | + litellm_url = url |
| 438 | + break |
| 439 | + except (socket.gaierror, Exception): |
| 440 | + continue |
420 | 441 | litellm_check = check_service_endpoint( |
421 | 442 | "LiteLLM", |
422 | | - "http://localhost:4000/health" |
| 443 | + f"{litellm_url}/" |
423 | 444 | ) |
424 | 445 |
|
425 | 446 | claude_agents_check = check_service_endpoint( |
|
0 commit comments