ModelEngine-Group · wuyuanfr · Jun 25, 2026 · Jun 25, 2026 · Jun 26, 2026 · Jun 26, 2026
@@ -86,6 +86,43 @@
 _CAPACITY_WARNING_LOCK = threading.Lock()
 
 
+# W11 spec line 710: emitted every time _resolve_input_budget resolves a row
+# whose dispatch-time capability_profile_version is non-null (i.e. the W1
+# exact catalog lookup succeeded). Combined with
+# model_capacity_suggestion_accept_total at save time gives the SLO ratio
+# "95% of accepted catalog suggestions produce the expected runtime profile".
+# Guarded so a missing OpenTelemetry runtime never breaks agent startup.
+try:
+    from opentelemetry import metrics as _otel_metrics
+
+    _capacity_dispatch_meter = _otel_metrics.get_meter(__name__)
+    _capacity_dispatch_profile_hit_total = _capacity_dispatch_meter.create_counter(
+        name="model_capacity_suggestion_dispatch_profile_hit_total",
+        description=(
+            "Count of agent dispatches where the resolved W1 capacity "
+            "snapshot reports a non-null capability_profile_version "
+            "(i.e. the runtime profile match succeeded). Labelled by "
+            "provider."
+        ),
+        unit="dispatches",
+    )
+except Exception:  # pragma: no cover - OTel is optional at runtime
+    _capacity_dispatch_profile_hit_total = None
+
+
+def _record_dispatch_profile_hit(provider: Optional[str]) -> None:
+    """Emit dispatch_profile_hit_total for one successful runtime profile match."""
+    if _capacity_dispatch_profile_hit_total is None:
+        return
+    try:
+        _capacity_dispatch_profile_hit_total.add(
+            1,
+            {"provider": (provider or "unknown").lower()},
+        )
+    except Exception:  # pragma: no cover - never break agent run for telemetry
+        pass
+
+
 def _operator_overrides_from_model_info(model_info: Optional[dict]) -> dict:
     """Extract the W1 operator-override fields from a model_record_t row."""
     if not isinstance(model_info, dict):
@@ -102,7 +139,7 @@ def _dominant_capacity_source(field_sources: dict) -> Optional[str]:
     values = [value for value in field_sources.values() if value]
     if not values:
         return None
-    for preferred in ("operator", "profile", "provider_candidate", "legacy", "unknown"):
+    for preferred in ("operator", "profile", "provider_candidate", "legacy", "default", "unknown"):
         if preferred in values:
             return preferred
     return values[0]
@@ -224,6 +261,8 @@ def _resolve_input_budget(
             snapshot.capability_profile_version,
             snapshot.fingerprint,
         )
+        if snapshot.capability_profile_version:
+            _record_dispatch_profile_hit(provider)
         return (
             snapshot.provider_input_limit_tokens,
             _capacity_snapshot_for_monitoring(snapshot),

@@ -55,6 +55,8 @@
     list_llm_models_for_tenant,
     list_models_for_admin,
     get_capacity_coverage,
+    pop_capacity_accept_signal,
+    _record_capacity_suggestion_accept,
 )
 from utils.auth_utils import get_current_user_id
 
@@ -114,6 +116,9 @@ def _capacity_suggestion_for_model_request(request: ModelRequest):
     except ValueError as exc:
         logger.debug("Capacity suggestion unavailable for connectivity request: %s", exc)
         return None
+    except Exception as exc:
+        logger.warning("Capacity suggestion failed during connectivity request: %s", exc)
+        return None
 
 
 @router.post("/create")
@@ -133,9 +138,14 @@ async def create_model(request: ModelRequest, authorization: Optional[str] = Hea
     try:
         user_id, tenant_id = get_current_user_id(authorization)
         model_data = request.model_dump()
+        accept_signal = pop_capacity_accept_signal(model_data)
         logger.debug(
             f"Start to create model, user_id: {user_id}, tenant_id: {tenant_id}")
         await create_model_for_tenant(user_id, tenant_id, model_data)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model created successfully"
         })
@@ -242,7 +252,18 @@ async def batch_create_models(request: BatchCreateModelsRequest, authorization:
     try:
         user_id, tenant_id = get_current_user_id(authorization)
         batch_model_config = request.model_dump()
+        # Strip W11 accept-signal fields off every model entry before the
+        # batch reaches the service/DB layer. Same audit-only contract as
+        # the single-create path: pop now, emit the SLO counter on success.
+        accept_signals = [
+            signal
+            for model in batch_model_config.get("models", [])
+            if (signal := pop_capacity_accept_signal(model)) is not None
+        ]
         await batch_create_models_for_tenant(user_id, tenant_id, batch_model_config)
+        provider = batch_model_config.get("provider")
+        for signal in accept_signals:
+            _record_capacity_suggestion_accept(signal["match_kind"], provider)
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Batch create models successfully"
         })
@@ -298,7 +319,12 @@ async def update_single_model(
     """
     try:
         user_id, tenant_id = get_current_user_id(authorization)
+        accept_signal = pop_capacity_accept_signal(request)
         await update_single_model_for_tenant(user_id, tenant_id, display_name, request)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.get("model_factory")
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model updated successfully"
         })
@@ -530,7 +556,18 @@ async def manage_create_model(
             f"Start to create model for tenant, user_id: {user_id}, target_tenant_id: {request.tenant_id}")
 
         model_data = request.model_dump(exclude={'tenant_id'})
+        # Strip W11 accept-signal fields before the dict reaches the
+        # service (which calls create_model_record -> SQLAlchemy insert).
+        # Without the pop, the fields would fall through to .values() and
+        # raise "Unconsumed column names"; without the recorder call,
+        # operator-accepted suggestions saved by SU/asset-owner via
+        # /manage/* would silently miss the accept_total SLO numerator.
+        accept_signal = pop_capacity_accept_signal(model_data)
         await create_model_for_tenant(user_id, request.tenant_id, model_data)
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model created successfully",
             "data": {"tenant_id": request.tenant_id}
@@ -567,9 +604,16 @@ async def manage_update_model(
             f"current_display_name: {request.current_display_name}")
 
         model_data = request.model_dump(exclude={'tenant_id', 'current_display_name'}, exclude_unset=True)
+        # Same audit-only contract as /manage/create above: pop before
+        # the dict reaches update_model_record, emit after persist.
+        accept_signal = pop_capacity_accept_signal(model_data)
         await update_single_model_for_tenant(
             user_id, request.tenant_id, request.current_display_name, model_data
         )
+        if accept_signal is not None:
+            _record_capacity_suggestion_accept(
+                accept_signal["match_kind"], request.model_factory
+            )
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Model updated successfully",
             "data": {"tenant_id": request.tenant_id}
@@ -651,7 +695,17 @@ async def manage_batch_create_models(
             f"provider: {request.provider}, type: {request.type}, models count: {len(request.models)}")
 
         batch_model_config = request.model_dump()
+        # Mirror /provider/batch_create: pop W11 accept-signal fields per
+        # model before the dict reaches the service/DB layer; emit the SLO
+        # counter only after the batch persist call succeeds.
+        accept_signals = [
+            signal
+            for model in batch_model_config.get("models", [])
+            if (signal := pop_capacity_accept_signal(model)) is not None
+        ]
         await batch_create_models_for_tenant(user_id, request.tenant_id, batch_model_config)
+        for signal in accept_signals:
+            _record_capacity_suggestion_accept(signal["match_kind"], request.provider)
         return JSONResponse(status_code=HTTPStatus.OK, content={
             "message": "Batch create models successfully",
             "data": {