Skip to content

Commit 6c1fb29

Browse files
committed
health endpoint
1 parent 9335893 commit 6c1fb29

6 files changed

Lines changed: 94 additions & 3 deletions

File tree

code-interpreter/app/main.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
from app.api.routes import router as api_router
1313
from app.app_configs import EXECUTOR_BACKEND, HOST, PORT, PYTHON_EXECUTOR_DOCKER_IMAGE
14+
from app.models.schemas import HealthResponse
15+
from app.services.executor_factory import get_executor
1416

1517
# Configure logging
1618
logging.basicConfig(
@@ -101,8 +103,11 @@ def create_app() -> FastAPI:
101103
)
102104

103105
@app.get("/health")
104-
def health() -> dict[str, str]: # sync + strictly typed
105-
return {"status": "ok"}
106+
def health() -> HealthResponse:
107+
"""Health check that verifies the executor backend is operational."""
108+
result = get_executor().check_health()
109+
return HealthResponse(status=result.status, message=result.message)
110+
106111

107112
app.include_router(api_router, prefix="/v1")
108113
return app

code-interpreter/app/models/schemas.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,11 @@ class ListFilesResponse(BaseModel):
115115
default_factory=list,
116116
description="List of all stored files with their metadata.",
117117
)
118+
119+
120+
# ── Health check models ──────────────────────────────────────────────
121+
122+
123+
class HealthResponse(BaseModel):
124+
status: Literal["ok", "error"]
125+
message: StrictStr | None = None

code-interpreter/app/services/executor_base.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ class StreamResult:
9898
StreamEvent = StreamChunk | StreamResult
9999

100100

101+
@dataclass(frozen=True, slots=True)
102+
class HealthCheck:
103+
"""Result of an executor health check."""
104+
105+
status: Literal["ok", "error"]
106+
message: str | None = None
107+
108+
101109
class ExecutorProtocol(Protocol):
102110
def execute_python(
103111
self,
@@ -114,6 +122,13 @@ def execute_python(
114122

115123

116124
class BaseExecutor(ABC):
125+
def check_health(self) -> HealthCheck:
126+
"""Check if the executor backend is operational.
127+
128+
Default implementation returns ok. Override for backend-specific checks.
129+
"""
130+
return HealthCheck(status="ok")
131+
117132
@abstractmethod
118133
def execute_python(
119134
self,

code-interpreter/app/services/executor_docker.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
BaseExecutor,
2525
EntryKind,
2626
ExecutionResult,
27+
HealthCheck,
2728
StreamChunk,
2829
StreamEvent,
2930
StreamResult,
@@ -49,6 +50,51 @@ def __init__(self) -> None:
4950
self.image = PYTHON_EXECUTOR_DOCKER_IMAGE
5051
self.run_args = PYTHON_EXECUTOR_DOCKER_RUN_ARGS
5152

53+
def check_health(self) -> HealthCheck:
54+
"""Verify Docker daemon is reachable and the executor image is available."""
55+
# Check Docker daemon connectivity
56+
try:
57+
result = subprocess.run(
58+
[self.docker_binary, "version", "--format", "{{.Server.Version}}"],
59+
capture_output=True,
60+
timeout=5,
61+
check=False,
62+
)
63+
except FileNotFoundError:
64+
return HealthCheck(status="error", message="Docker binary not found")
65+
except subprocess.TimeoutExpired:
66+
return HealthCheck(status="error", message="Docker daemon not responding")
67+
68+
if result.returncode != 0:
69+
stderr = result.stderr.decode("utf-8", errors="replace").strip()
70+
return HealthCheck(
71+
status="error",
72+
message=f"Docker daemon not reachable: {stderr}",
73+
)
74+
75+
# Check executor image is available locally
76+
image_with_tag = f"{self.image}:latest"
77+
try:
78+
img_result = subprocess.run(
79+
[self.docker_binary, "image", "inspect", image_with_tag],
80+
capture_output=True,
81+
timeout=5,
82+
check=False,
83+
)
84+
except subprocess.TimeoutExpired:
85+
return HealthCheck(
86+
status="error",
87+
message=f"Timeout checking image {image_with_tag}",
88+
)
89+
90+
if img_result.returncode != 0:
91+
return HealthCheck(
92+
status="error",
93+
message=f"Executor image {image_with_tag} not available locally",
94+
)
95+
96+
return HealthCheck(status="ok")
97+
5298
def _resolve_docker_binary(self) -> str:
5399
candidate = PYTHON_EXECUTOR_DOCKER_BIN
54100
docker_path = which(candidate)

code-interpreter/app/services/executor_kubernetes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
BaseExecutor,
3232
EntryKind,
3333
ExecutionResult,
34+
HealthCheck,
3435
WorkspaceEntry,
3536
wrap_last_line_interactive,
3637
)
@@ -76,6 +77,22 @@ def __init__(self) -> None:
7677
self.image = KUBERNETES_EXECUTOR_IMAGE
7778
self.service_account = KUBERNETES_EXECUTOR_SERVICE_ACCOUNT
7879

80+
def check_health(self) -> HealthCheck:
81+
"""Verify Kubernetes API is reachable and the namespace is accessible."""
82+
try:
83+
self.v1.read_namespace(name=self.namespace)
84+
except ApiException as e:
85+
return HealthCheck(
86+
status="error",
87+
message=f"Kubernetes API error (namespace={self.namespace}): {e.reason}",
88+
)
89+
except Exception as e:
90+
return HealthCheck(
91+
status="error",
92+
message=f"Kubernetes API not reachable: {e}",
93+
)
94+
return HealthCheck(status="ok")
95+
7996
def _create_pod_manifest(
8097
self,
8198
pod_name: str,

code-interpreter/tests/e2e/test_basic_flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_execute_endpoint_basic_flow() -> None:
1818
pytest.fail(f"Failed to reach Code Interpreter service at {BASE_URL}: {exc!s}")
1919

2020
assert health_response.status_code == 200, health_response.text
21-
assert health_response.json() == {"status": "ok"}
21+
assert health_response.json()["status"] == "ok"
2222

2323
execute_payload: dict[str, Any] = {
2424
"code": "print('hello from e2e')",

0 commit comments

Comments
 (0)