Skip to content

Commit cdbf424

Browse files
Make UC model discovery the default; remove --enable-uc
ucode configure only found the opus Claude model on workspaces where the legacy /ai-gateway/anthropic/v1/models listing dropped sonnet/haiku once system.ai endpoints existed. UC model-services discovery found all three but was hidden behind --enable-uc (and had no fallback). Now discovery is UC-first and best-effort: try model-services first, fall back to the legacy per-family listing per family when UC returns nothing. The --enable-uc flag, UCODE_ENABLE_UC env var, and uc_enabled helper are removed, and curated system.ai.* MCP services are surfaced by default. The model-services endpoint still 499s without a bounded page_size, so pagination (page_size=100, follow next_page_token, light per-page retry) is retained. Co-authored-by: Isaac
1 parent 7cd9009 commit cdbf424

6 files changed

Lines changed: 124 additions & 321 deletions

File tree

src/ucode/cli.py

Lines changed: 14 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
normalize_workspace_url,
4747
resolve_pat_token,
4848
run_databricks_login,
49-
uc_enabled,
5049
)
5150
from ucode.mcp import (
5251
MCP_CLIENTS,
@@ -195,8 +194,6 @@ def configure_shared_state(
195194
profile: str | None = None,
196195
tools: list[str] | None = None,
197196
force_login: bool = False,
198-
enable_uc: bool | None = None,
199-
reset_uc: bool = False,
200197
use_pat: bool | None = None,
201198
) -> dict:
202199
"""Log into Databricks, enforce AI Gateway v2, fetch model lists, persist state.
@@ -211,28 +208,10 @@ def configure_shared_state(
211208
``--profile`` to every CLI invocation so ambiguous `~/.databrickscfg`
212209
entries (e.g. DEFAULT and a named profile both pointing at the same host)
213210
don't error out. If ``None``, we resolve it from the host after login.
214-
``enable_uc`` is the resolved CLI flag (`--enable-uc`): when not None
215-
it overrides both the env var and the persisted state.
216-
``reset_uc`` is True only on the explicit ``ucode configure`` flow.
217211
"""
218212
workspace = normalize_workspace_url(workspace)
219213
prior_state = load_state()
220214
previous_workspace = prior_state.get("workspace")
221-
# Precedence: explicit CLI flag > env var > (configure: reset to False;
222-
# launch: target workspace's persisted state). Use *target* state on the
223-
# launch path so the flag is sticky per-workspace and doesn't leak
224-
# across workspace switches.
225-
# TODO: when this flips uc_enabled True->False, prune any
226-
# `system.ai.*` MCP services from state["mcp_servers"] (and their
227-
# cross-tool registrations). Today they linger as orphans pointing at
228-
# /ai-gateway/mcp-services/* until the user re-runs `configure mcp`
229-
# or switches workspaces.
230-
if enable_uc is None:
231-
if reset_uc:
232-
enable_uc = uc_enabled(default=False)
233-
else:
234-
target_ws_state = load_full_state().get("workspaces", {}).get(workspace) or {}
235-
enable_uc = uc_enabled(default=bool(target_ws_state.get("uc_enabled")))
236215
if use_pat is None:
237216
use_pat = bool(prior_state.get("use_pat")) and previous_workspace == workspace
238217
fetch_all = tools is None
@@ -278,25 +257,23 @@ def configure_shared_state(
278257
claude_models = {}
279258
gemini_models = []
280259
codex_models = []
281-
if enable_uc:
282-
# Opt-in: one UC model-services call yields all families as
283-
# `system.ai.<model-name>` ids, bucketed by name. The single reason is
284-
# shared across the families that were requested.
285-
with spinner("Fetching available models (model services)..."):
286-
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
260+
# UC-first, best-effort: one UC model-services call yields all families as
261+
# `system.ai.<model-name>` ids, bucketed by name. If a family comes back
262+
# empty (workspace without UC model-services, or the listing failed), fall
263+
# back to the per-family AI Gateway listing for that family only.
264+
with spinner("Fetching available models..."):
265+
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
287266
if want_claude:
288267
claude_models, claude_reason = ms_claude, ms_reason
268+
if not claude_models:
269+
claude_models, claude_reason = discover_claude_models(workspace, token)
289270
if want_gemini:
290271
gemini_models, gemini_reason = ms_gemini, ms_reason
272+
if not gemini_models:
273+
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
291274
if want_codex:
292275
codex_models, codex_reason = ms_codex, ms_reason
293-
else:
294-
with spinner("Fetching available models..."):
295-
if want_claude:
296-
claude_models, claude_reason = discover_claude_models(workspace, token)
297-
if want_gemini:
298-
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
299-
if want_codex:
276+
if not codex_models:
300277
codex_models, codex_reason = discover_codex_models(workspace, token)
301278
opencode_models: dict[str, list[str]] = {}
302279
if claude_models:
@@ -311,9 +288,8 @@ def configure_shared_state(
311288
state["profile"] = profile
312289
else:
313290
state.pop("profile", None)
314-
# Persist the resolved flag so subsequent launches stay on the same
315-
# discovery path without the env var or CLI flag being re-passed.
316-
state["uc_enabled"] = enable_uc
291+
# UC discovery is now always-on; drop any flag persisted by older versions.
292+
state.pop("uc_enabled", None)
317293
# Persist the auth mode so launches rebuild the same (PAT-based) agent
318294
# auth command; an explicit re-configure without --use-pat clears it.
319295
if use_pat:
@@ -349,8 +325,6 @@ def _configure_shared_workspace_states(
349325
tools: list[str] | None,
350326
*,
351327
force_login: bool,
352-
enable_uc: bool | None = None,
353-
reset_uc: bool = False,
354328
use_pat: bool = False,
355329
) -> list[dict]:
356330
if not workspaces:
@@ -363,8 +337,6 @@ def _configure_shared_workspace_states(
363337
profile=profile,
364338
tools=tools,
365339
force_login=force_login,
366-
enable_uc=enable_uc,
367-
reset_uc=reset_uc,
368340
use_pat=use_pat,
369341
)
370342
)
@@ -377,8 +349,6 @@ def configure_workspace_command(
377349
workspaces: list[tuple[str, str | None]] | None = None,
378350
*,
379351
prompt_optional_updates: bool = True,
380-
enable_uc: bool | None = None,
381-
reset_uc: bool = False,
382352
use_pat: bool = False,
383353
skip_validate: bool = False,
384354
) -> int:
@@ -392,8 +362,6 @@ def configure_workspace_command(
392362
workspace_entries,
393363
[tool],
394364
force_login=True,
395-
enable_uc=enable_uc,
396-
reset_uc=reset_uc,
397365
use_pat=use_pat,
398366
)
399367
state = states[0]
@@ -429,8 +397,6 @@ def configure_workspace_command(
429397
workspace_entries,
430398
selected_tools,
431399
force_login=True,
432-
enable_uc=enable_uc,
433-
reset_uc=reset_uc,
434400
use_pat=use_pat,
435401
)
436402
state = states[0]
@@ -827,18 +793,6 @@ def configure(
827793
"'low' prints terse single-line status instead.",
828794
),
829795
] = "normal",
830-
enable_uc: Annotated[
831-
bool,
832-
typer.Option(
833-
"--enable-uc",
834-
help="Discover models via UC `model-services` (`system.ai.<model>`) and "
835-
"surface curated `system.ai.*` MCP services. Equivalent to setting "
836-
"UCODE_ENABLE_UC=1 for this configure run. The value is persisted so "
837-
"subsequent `ucode <agent>` launches stay on the same discovery path; "
838-
"re-run `ucode configure` without the flag (and without "
839-
"UCODE_ENABLE_UC=1 in the env) to turn UC discovery back off.",
840-
),
841-
] = False,
842796
) -> None:
843797
"""Configure workspace URL and AI Gateway."""
844798
if ctx.invoked_subcommand is not None:
@@ -849,10 +803,6 @@ def configure(
849803
set_dry_run(dry_run)
850804
set_verbosity(verbose)
851805
prompt_optional_updates = not skip_upgrade
852-
flag_enable_uc: bool | None = True if enable_uc else None
853-
# Explicit `ucode configure` is a clean slate: when the user omits both
854-
# `--enable-uc` and `UCODE_ENABLE_UC`, persisted `uc_enabled=true` from
855-
# a prior run is reset to false.
856806
try:
857807
install_databricks_cli()
858808
if agent is not None and agents is not None:
@@ -883,15 +833,11 @@ def configure(
883833
prompt_optional_updates=prompt_optional_updates,
884834
)
885835
if workspace_entries is None:
886-
configure_workspace_command(
887-
tool, enable_uc=flag_enable_uc, reset_uc=True, **skip_kwargs
888-
)
836+
configure_workspace_command(tool, **skip_kwargs)
889837
else:
890838
configure_workspace_command(
891839
tool,
892840
workspaces=workspace_entries,
893-
enable_uc=flag_enable_uc,
894-
reset_uc=True,
895841
**skip_kwargs,
896842
)
897843
elif agents is not None:
@@ -900,17 +846,13 @@ def configure(
900846
configure_workspace_command(
901847
selected_tools=selected_tools,
902848
prompt_optional_updates=prompt_optional_updates,
903-
enable_uc=flag_enable_uc,
904-
reset_uc=True,
905849
**skip_kwargs,
906850
)
907851
else:
908852
configure_workspace_command(
909853
selected_tools=selected_tools,
910854
workspaces=workspace_entries,
911855
prompt_optional_updates=prompt_optional_updates,
912-
enable_uc=flag_enable_uc,
913-
reset_uc=True,
914856
**skip_kwargs,
915857
)
916858
else:
@@ -919,16 +861,12 @@ def configure(
919861
if workspace_entries is None:
920862
configure_workspace_command(
921863
prompt_optional_updates=prompt_optional_updates,
922-
enable_uc=flag_enable_uc,
923-
reset_uc=True,
924864
**skip_kwargs,
925865
)
926866
else:
927867
configure_workspace_command(
928868
workspaces=workspace_entries,
929869
prompt_optional_updates=prompt_optional_updates,
930-
enable_uc=flag_enable_uc,
931-
reset_uc=True,
932870
**skip_kwargs,
933871
)
934872
if tracing:

src/ucode/databricks.py

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,27 +1050,6 @@ def build_auth_shell_command(
10501050
)
10511051

10521052

1053-
def uc_enabled(default: bool = False) -> bool:
1054-
"""True when the opt-in UC-securables discovery path is enabled.
1055-
1056-
Three input precedences, callers handle the highest one first:
1057-
1. ``ucode configure --enable-uc / --no-enable-uc`` (resolved by the
1058-
CLI before this function is called and passed in via ``default``,
1059-
since it overrides everything).
1060-
2. ``UCODE_ENABLE_UC=1`` (or true/yes/on) env var.
1061-
3. The value persisted in state (sticky, also passed via ``default``).
1062-
1063-
Enabling UC discovery makes ucode:
1064-
- resolve models via UC `model-services` as `system.ai.<model-name>`
1065-
instead of the per-family AI Gateway listings
1066-
- surface curated `system.ai.*` MCP services in `ucode configure mcp`
1067-
"""
1068-
raw = os.environ.get("UCODE_ENABLE_UC")
1069-
if raw is None or not raw.strip():
1070-
return default
1071-
return raw.strip().lower() in {"1", "true", "yes", "on"}
1072-
1073-
10741053
# A model-service's `name` is `model-services/system.ai.<model-name>`; the
10751054
# part after the prefix is exactly the model string agents send (no
10761055
# `databricks-` infix — that only appears on the inner destination name).
@@ -1097,11 +1076,12 @@ def _model_service_id(service: dict) -> str | None:
10971076
return name or None
10981077

10991078

1100-
# The model-services metastore listing is slow and flaky — large pages
1101-
# routinely 504 with `Timeout listing model services under metastore`. A small
1102-
# page is far more likely to come back, and each page gets a few retries before
1103-
# we give up.
1104-
_MODEL_SERVICES_PAGE_SIZE = 10
1079+
# The model-services metastore listing REQUIRES a bounded `page_size`:
1080+
# unparameterized or large-page requests (verified against
1081+
# eng-ml-agent-platform.staging 2026-06-14) return `HTTP 499` with an empty
1082+
# body, while pages of 10–100 come back reliably. A page can still 499
1083+
# intermittently under load, so each gets a few retries before we give up.
1084+
_MODEL_SERVICES_PAGE_SIZE = 100
11051085
_MODEL_SERVICES_PAGE_RETRIES = 4
11061086

11071087

@@ -1110,9 +1090,9 @@ def _get_model_services_page(
11101090
) -> tuple[dict | list | None, str | None]:
11111091
"""GET one model-services page, retrying on failure.
11121092
1113-
The endpoint frequently 504s under load; a retry usually succeeds. Returns
1114-
the same (payload, reason) shape as ``_http_get_json`` — the last attempt's
1115-
result when all retries are exhausted."""
1093+
The endpoint intermittently 499/504s under load; a retry usually succeeds.
1094+
Returns the same (payload, reason) shape as ``_http_get_json`` — the last
1095+
attempt's result when all retries are exhausted."""
11161096
payload: dict | list | None = None
11171097
reason: str | None = None
11181098
for attempt in range(retries):
@@ -1133,11 +1113,10 @@ def list_model_services(
11331113
"""List all `system.ai.*` model ids via the UC model-services API.
11341114
11351115
Pages through ``/api/2.1/unity-catalog/model-services`` (metastore scope)
1136-
and returns the de-duplicated, sorted list of ``system.ai.<model-name>``
1137-
ids. Uses a small page size with per-page retries because the endpoint is
1138-
slow and frequently 504s. Returns (ids, reason); reason is None on success,
1139-
otherwise it describes why the list is empty (HTTP/network error or no
1140-
services).
1116+
with a bounded ``page_size`` (the endpoint 499s without one) and returns the
1117+
de-duplicated, sorted list of ``system.ai.<model-name>`` ids. Returns
1118+
(ids, reason); reason is None on success, otherwise it describes why the
1119+
list is empty (HTTP/network error or no services).
11411120
"""
11421121
hostname = workspace_hostname(workspace)
11431122
ids: list[str] = []

src/ucode/mcp.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
list_databricks_connections,
3636
list_genie_spaces,
3737
list_mcp_services,
38-
uc_enabled,
3938
workspace_hostname,
4039
)
4140
from ucode.state import load_full_state, load_state, save_state
@@ -990,17 +989,12 @@ def configure_mcp_command() -> int:
990989
"Databricks apps",
991990
lambda: discover_app_mcp_servers(workspace, profile),
992991
)
993-
# Curated `system.ai.*` MCP services live behind a separate UC API and
994-
# are gated on the same UC opt-in that enables model-services discovery
995-
# (env: UCODE_ENABLE_UC, CLI: `ucode configure --enable-uc`, persisted
996-
# in state on configure).
997-
available_mcp_service_names = (
998-
_discover_mcp_source(
999-
"MCP services",
1000-
lambda: discover_mcp_service_names(workspace, profile),
1001-
)
1002-
if uc_enabled(default=bool(state.get("uc_enabled")))
1003-
else []
992+
# Curated `system.ai.*` MCP services live behind a separate UC API. Like
993+
# the other sources this is best-effort — `_discover_mcp_source` swallows
994+
# failures and returns [] so workspaces without them just see nothing extra.
995+
available_mcp_service_names = _discover_mcp_source(
996+
"MCP services",
997+
lambda: discover_mcp_service_names(workspace, profile),
1004998
)
1005999

10061000
original_mcp_servers: list[dict] = list(state.get("mcp_servers") or [])

0 commit comments

Comments
 (0)