Skip to content

Commit 88a2f34

Browse files
fregataaclaude
andcommitted
feat(BA-5985): allow deployments without health_check via deployment_revisions.health_check_enabled flag
Persist a denormalized boolean on deployment_revisions indicating whether the revision's model_definition declares service.health_check, computed at insert time from ModelDefinition.is_health_check_enabled() and backfilled for existing rows. Manager skips HTTP probing entirely for revisions that omit the block — routes stay NOT_CHECKED for life and still register with AppProxy so traffic flows. - Schema + ORM column + Alembic backfill (handles JSON null vs SQL NULL) - The boolean is the SQL-level filter; the in-memory RouteData carries the resolved ModelHealthCheck (or None) joined eagerly from deployment_revisions.model_definition, so no second per-revision fetch is needed when initialising health records - Filter framework: RouteHealthCheckFilter dataclass; handler.health_check_filter() abstract classmethod; RouteTargetStatuses gains traffic_status - Observer-dedicated repository entry: get_routes_for_health_observation() - HealthCheckRouteHandler: skip revisions with health_check disabled - AppProxySyncRouteHandler: include revisions with health_check disabled (HEALTHY OR disabled, plus traffic_status=ACTIVE) so unconfigured routes still register - RouteData.session_data (replaces session_id field; session_id kept as property) carries SessionStatus so sync_appproxy can validate live sessions - sync_appproxy uses input routes' replica_host/replica_port directly (same pattern as register_routes_now); removes the second route+kernel re-query via fetch_route_connection_infos and the related condition helper - Skip routes whose session is not RUNNING/CREATING; error on missing session_data or replica info Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b055006 commit 88a2f34

31 files changed

Lines changed: 454 additions & 275 deletions

changes/11534.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow model deployments to omit `service.health_check`; manager skips HTTP probing for such revisions and AppProxy receives traffic as soon as routes reach `RUNNING`.

src/ai/backend/common/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,10 @@ def health_check_config(self) -> ModelHealthCheck | None:
489489
return model.service.health_check
490490
return None
491491

492+
def is_health_check_enabled(self) -> bool:
493+
"""Whether any model in this definition declares a health_check block."""
494+
return self.health_check_config() is not None
495+
492496
def with_args_appended(self, args: list[str]) -> ModelDefinition:
493497
"""Return a copy with ``args`` appended to each model's
494498
``service.start_command`` as separate argv tokens.

src/ai/backend/manager/data/deployment/types.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,29 @@ class DeploymentTargetStatuses:
250250

251251
@dataclass(frozen=True)
252252
class RouteTargetStatuses:
253-
"""Target statuses for route handler filtering (lifecycle x health)."""
253+
"""Target statuses for route handler filtering (lifecycle x health x traffic).
254+
255+
``traffic_status=None`` skips the filter; otherwise the row's
256+
``traffic_status`` must match.
257+
"""
254258

255259
lifecycle: list[RouteStatus]
256260
health: list[RouteHealthStatus]
261+
traffic_status: RouteTrafficStatus | None = None
262+
263+
264+
@dataclass(frozen=True)
265+
class RouteHealthCheckFilter:
266+
"""Revision-level ``health_check_enabled`` gating for route queries.
267+
268+
- ``health_check_required=None``: no filter on the flag.
269+
- ``health_check_required=True/False``: AND with the flag.
270+
- ``include_health_check_disabled=True``: OR-include rows where the flag
271+
is ``False``, regardless of the row's ``health_status``.
272+
"""
273+
274+
health_check_required: bool | None = None
275+
include_health_check_disabled: bool = False
257276

258277

259278
@dataclass(frozen=True)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""add health_check_enabled to deployment_revisions
2+
3+
Revision ID: d1afd38a32ee
4+
Revises: fc249eccd0b2
5+
Create Date: 2026-05-09
6+
7+
Denormalized boolean of whether the revision's ``model_definition``
8+
declares a ``service.health_check`` block. Backfilled by scanning
9+
existing JSONB rows.
10+
11+
"""
12+
13+
import sqlalchemy as sa
14+
from alembic import op
15+
16+
# revision identifiers, used by Alembic.
17+
# Part of: 26.5.0 (main)
18+
revision = "d1afd38a32ee"
19+
down_revision = "fc249eccd0b2"
20+
branch_labels = None
21+
depends_on = None
22+
23+
24+
def upgrade() -> None:
25+
op.add_column(
26+
"deployment_revisions",
27+
sa.Column(
28+
"health_check_enabled",
29+
sa.Boolean(),
30+
nullable=False,
31+
server_default=sa.false(),
32+
),
33+
)
34+
35+
# ``->`` returns SQL NULL only when the key is absent; an explicit
36+
# JSON ``null`` (which ``PydanticColumn.process_bind_param`` produces
37+
# for ``health_check=None``) is a JSONB 'null' value, not SQL NULL.
38+
# Filter both forms with ``jsonb_typeof(...) != 'null'``.
39+
op.execute(
40+
sa.text(
41+
"""
42+
UPDATE deployment_revisions
43+
SET health_check_enabled = TRUE
44+
WHERE model_definition IS NOT NULL
45+
AND EXISTS (
46+
SELECT 1
47+
FROM jsonb_array_elements(
48+
COALESCE(model_definition->'models', '[]'::jsonb)
49+
) AS m
50+
WHERE m->'service'->'health_check' IS NOT NULL
51+
AND jsonb_typeof(m->'service'->'health_check') != 'null'
52+
)
53+
"""
54+
)
55+
)
56+
57+
58+
def downgrade() -> None:
59+
op.drop_column("deployment_revisions", "health_check_enabled")

src/ai/backend/manager/models/deployment_revision/row.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ class DeploymentRevisionRow(Base): # type: ignore[misc]
139139
model_definition: Mapped[ModelDefinition | None] = mapped_column(
140140
"model_definition", PydanticColumn(ModelDefinition), nullable=True
141141
)
142+
# Set at insert time from ``model_definition.is_health_check_enabled()``.
143+
health_check_enabled: Mapped[bool] = mapped_column(
144+
"health_check_enabled",
145+
sa.Boolean(),
146+
nullable=False,
147+
default=False,
148+
server_default=sa.false(),
149+
)
142150

143151
# Resource configuration
144152
resource_group: Mapped[str] = mapped_column(

src/ai/backend/manager/repositories/deployment/creators/revision.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ def build_row(self) -> DeploymentRevisionRow:
7373
model_mount_destination=self.model_mount_destination,
7474
model_definition_path=self.model_definition_path,
7575
model_definition=self.model_definition,
76+
health_check_enabled=bool(
77+
self.model_definition and self.model_definition.is_health_check_enabled()
78+
),
7679
resource_group=self.resource_group,
7780
resource_opts=self.resource_opts,
7881
cluster_mode=self.cluster_mode,

0 commit comments

Comments
 (0)