Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
68498c6
fix(model_management): preserve connectivity success when capacity su…
wuyuanfr Jun 25, 2026
f555fda
refactor(w11): collapse Add/Edit capacity-suggestion controls
wuyuanfr Jun 25, 2026
f0e82d3
feat(w11): backend SLO instrumentation + cross-tenant capacity-covera…
wuyuanfr Jun 26, 2026
e442a55
feat(w11): V1.5 bare-capacity tag + preset selector + permission helper
wuyuanfr Jun 26, 2026
e0ef307
fix(w11): unify ModelEditDialog state-per-model via key remount + aut…
wuyuanfr Jun 26, 2026
00c8c62
feat(w11): V1.5 bare-capacity surfaces + dual legacy hint + accept-si…
wuyuanfr Jun 26, 2026
775b0c8
fix(w11): compact bare-capacity UI — icon+tooltip in model selector, …
wuyuanfr Jun 26, 2026
d6165cb
fix(w11): close remaining spec gaps — bare-capacity badge in model li…
wuyuanfr Jun 26, 2026
f65f859
fix(w11): remove obsolete deprecatedMaxTokens warning from ModelEditD…
wuyuanfr Jun 26, 2026
04b4bc0
fix(w11): backfill bare LLM/VLM rows with safe capacity defaults
wuyuanfr Jun 27, 2026
3d13339
fix(i18n): rename 'catalog suggestion' to 'capacity suggestion' in co…
wuyuanfr Jun 27, 2026
f785f82
feat(w11): expand capability catalog to 66 entries with SiliconFlow m…
wuyuanfr Jun 27, 2026
8b0497c
feat(w11): auto-backfill capacity from catalog on startup
wuyuanfr Jun 27, 2026
2a9cbcb
fix(w11): plug 3 production bugs in V1.5 capacity-suggestion accept-s…
wuyuanfr Jun 27, 2026
e8aacc2
refactor(w11): replace startup backfill with SQL generator
wuyuanfr Jun 27, 2026
a9550fa
fix(w11): add reserve <= max_output safety guard to backfill SQL
wuyuanfr Jun 27, 2026
9d1547e
fix(w11): use capacity_source='unknown' for safe-default backfill rows
wuyuanfr Jun 27, 2026
0b94407
feat(w11): add capacity_source='default' for system-default backfill …
wuyuanfr Jun 27, 2026
31fc590
refactor(w11): remove Phase 3 max_tokens reconcile from backfill SQL
wuyuanfr Jun 27, 2026
4baf92b
fix(sdk): remove reverse max_tokens backfill from ModelConfig validator
wuyuanfr Jun 27, 2026
46f59b7
Merge remote-tracking branch 'upstream/develop' into feature/w11-capa…
wuyuanfr Jun 27, 2026
b7d6b2f
chore(sql): remove superseded v2.2.0_0617 capacity data fix migration
wuyuanfr Jun 27, 2026
5e08815
fix(w11): Phase 1b now upgrades capacity_source 'default' to 'profile'
wuyuanfr Jun 27, 2026
fafdcfd
refactor(w11): use PL/pgSQL constants in generated backfill SQL
wuyuanfr Jun 27, 2026
9769a65
fix(test): use model_factory instead of provider in accept signal test
wuyuanfr Jun 27, 2026
212a0dd
fix(catalog): use 'silicon' provider for SiliconFlow-hosted DeepSeek …
wuyuanfr Jun 27, 2026
1c3f21a
fix(w11): use rsplit in _split_repo_name to match backend split logic
wuyuanfr Jun 27, 2026
290506d
fix(sdk): prevent legacy max_tokens semantic drift in ModelConfig val…
wuyuanfr Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion backend/agents/create_agent_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,43 @@
_CAPACITY_WARNING_LOCK = threading.Lock()


# W11 spec line 710: emitted every time _resolve_input_budget resolves a row
# whose dispatch-time capability_profile_version is non-null (i.e. the W1
# exact catalog lookup succeeded). Combined with
# model_capacity_suggestion_accept_total at save time gives the SLO ratio
# "95% of accepted catalog suggestions produce the expected runtime profile".
# Guarded so a missing OpenTelemetry runtime never breaks agent startup.
try:
from opentelemetry import metrics as _otel_metrics

_capacity_dispatch_meter = _otel_metrics.get_meter(__name__)
_capacity_dispatch_profile_hit_total = _capacity_dispatch_meter.create_counter(
name="model_capacity_suggestion_dispatch_profile_hit_total",
description=(
"Count of agent dispatches where the resolved W1 capacity "
"snapshot reports a non-null capability_profile_version "
"(i.e. the runtime profile match succeeded). Labelled by "
"provider."
),
unit="dispatches",
)
except Exception: # pragma: no cover - OTel is optional at runtime
_capacity_dispatch_profile_hit_total = None


def _record_dispatch_profile_hit(provider: Optional[str]) -> None:
"""Emit dispatch_profile_hit_total for one successful runtime profile match."""
if _capacity_dispatch_profile_hit_total is None:
return
try:
_capacity_dispatch_profile_hit_total.add(
1,
{"provider": (provider or "unknown").lower()},
)
except Exception: # pragma: no cover - never break agent run for telemetry
pass


def _operator_overrides_from_model_info(model_info: Optional[dict]) -> dict:
"""Extract the W1 operator-override fields from a model_record_t row."""
if not isinstance(model_info, dict):
Expand All @@ -102,7 +139,7 @@ def _dominant_capacity_source(field_sources: dict) -> Optional[str]:
values = [value for value in field_sources.values() if value]
if not values:
return None
for preferred in ("operator", "profile", "provider_candidate", "legacy", "unknown"):
for preferred in ("operator", "profile", "provider_candidate", "legacy", "default", "unknown"):
if preferred in values:
return preferred
return values[0]
Expand Down Expand Up @@ -224,6 +261,8 @@ def _resolve_input_budget(
snapshot.capability_profile_version,
snapshot.fingerprint,
)
if snapshot.capability_profile_version:
_record_dispatch_profile_hit(provider)
return (
snapshot.provider_input_limit_tokens,
_capacity_snapshot_for_monitoring(snapshot),
Expand Down
54 changes: 54 additions & 0 deletions backend/apps/model_managment_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
list_llm_models_for_tenant,
list_models_for_admin,
get_capacity_coverage,
pop_capacity_accept_signal,
_record_capacity_suggestion_accept,
)
from utils.auth_utils import get_current_user_id

Expand Down Expand Up @@ -114,6 +116,9 @@ def _capacity_suggestion_for_model_request(request: ModelRequest):
except ValueError as exc:
logger.debug("Capacity suggestion unavailable for connectivity request: %s", exc)
return None
except Exception as exc:
logger.warning("Capacity suggestion failed during connectivity request: %s", exc)
return None


@router.post("/create")
Expand All @@ -133,9 +138,14 @@ async def create_model(request: ModelRequest, authorization: Optional[str] = Hea
try:
user_id, tenant_id = get_current_user_id(authorization)
model_data = request.model_dump()
accept_signal = pop_capacity_accept_signal(model_data)
logger.debug(
f"Start to create model, user_id: {user_id}, tenant_id: {tenant_id}")
await create_model_for_tenant(user_id, tenant_id, model_data)
if accept_signal is not None:
_record_capacity_suggestion_accept(
accept_signal["match_kind"], request.model_factory
)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Model created successfully"
})
Expand Down Expand Up @@ -242,7 +252,18 @@ async def batch_create_models(request: BatchCreateModelsRequest, authorization:
try:
user_id, tenant_id = get_current_user_id(authorization)
batch_model_config = request.model_dump()
# Strip W11 accept-signal fields off every model entry before the
# batch reaches the service/DB layer. Same audit-only contract as
# the single-create path: pop now, emit the SLO counter on success.
accept_signals = [
signal
for model in batch_model_config.get("models", [])
if (signal := pop_capacity_accept_signal(model)) is not None
]
await batch_create_models_for_tenant(user_id, tenant_id, batch_model_config)
provider = batch_model_config.get("provider")
for signal in accept_signals:
_record_capacity_suggestion_accept(signal["match_kind"], provider)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Batch create models successfully"
})
Expand Down Expand Up @@ -298,7 +319,12 @@ async def update_single_model(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
accept_signal = pop_capacity_accept_signal(request)
await update_single_model_for_tenant(user_id, tenant_id, display_name, request)
if accept_signal is not None:
_record_capacity_suggestion_accept(
accept_signal["match_kind"], request.get("model_factory")
)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Model updated successfully"
})
Expand Down Expand Up @@ -530,7 +556,18 @@ async def manage_create_model(
f"Start to create model for tenant, user_id: {user_id}, target_tenant_id: {request.tenant_id}")

model_data = request.model_dump(exclude={'tenant_id'})
# Strip W11 accept-signal fields before the dict reaches the
# service (which calls create_model_record -> SQLAlchemy insert).
# Without the pop, the fields would fall through to .values() and
# raise "Unconsumed column names"; without the recorder call,
# operator-accepted suggestions saved by SU/asset-owner via
# /manage/* would silently miss the accept_total SLO numerator.
accept_signal = pop_capacity_accept_signal(model_data)
await create_model_for_tenant(user_id, request.tenant_id, model_data)
if accept_signal is not None:
_record_capacity_suggestion_accept(
accept_signal["match_kind"], request.model_factory
)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Model created successfully",
"data": {"tenant_id": request.tenant_id}
Expand Down Expand Up @@ -567,9 +604,16 @@ async def manage_update_model(
f"current_display_name: {request.current_display_name}")

model_data = request.model_dump(exclude={'tenant_id', 'current_display_name'}, exclude_unset=True)
# Same audit-only contract as /manage/create above: pop before
# the dict reaches update_model_record, emit after persist.
accept_signal = pop_capacity_accept_signal(model_data)
await update_single_model_for_tenant(
user_id, request.tenant_id, request.current_display_name, model_data
)
if accept_signal is not None:
_record_capacity_suggestion_accept(
accept_signal["match_kind"], request.model_factory
)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Model updated successfully",
"data": {"tenant_id": request.tenant_id}
Expand Down Expand Up @@ -651,7 +695,17 @@ async def manage_batch_create_models(
f"provider: {request.provider}, type: {request.type}, models count: {len(request.models)}")

batch_model_config = request.model_dump()
# Mirror /provider/batch_create: pop W11 accept-signal fields per
# model before the dict reaches the service/DB layer; emit the SLO
# counter only after the batch persist call succeeds.
accept_signals = [
signal
for model in batch_model_config.get("models", [])
if (signal := pop_capacity_accept_signal(model)) is not None
]
await batch_create_models_for_tenant(user_id, request.tenant_id, batch_model_config)
for signal in accept_signals:
_record_capacity_suggestion_accept(signal["match_kind"], request.provider)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Batch create models successfully",
"data": {
Expand Down
Loading
Loading