Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 14 additions & 76 deletions src/ucode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
normalize_workspace_url,
resolve_pat_token,
run_databricks_login,
uc_enabled,
)
from ucode.mcp import (
MCP_CLIENTS,
Expand Down Expand Up @@ -195,8 +194,6 @@ def configure_shared_state(
profile: str | None = None,
tools: list[str] | None = None,
force_login: bool = False,
enable_uc: bool | None = None,
reset_uc: bool = False,
use_pat: bool | None = None,
) -> dict:
"""Log into Databricks, enforce AI Gateway v2, fetch model lists, persist state.
Expand All @@ -211,28 +208,10 @@ def configure_shared_state(
``--profile`` to every CLI invocation so ambiguous `~/.databrickscfg`
entries (e.g. DEFAULT and a named profile both pointing at the same host)
don't error out. If ``None``, we resolve it from the host after login.
``enable_uc`` is the resolved CLI flag (`--enable-uc`): when not None
it overrides both the env var and the persisted state.
``reset_uc`` is True only on the explicit ``ucode configure`` flow.
"""
workspace = normalize_workspace_url(workspace)
prior_state = load_state()
previous_workspace = prior_state.get("workspace")
# Precedence: explicit CLI flag > env var > (configure: reset to False;
# launch: target workspace's persisted state). Use *target* state on the
# launch path so the flag is sticky per-workspace and doesn't leak
# across workspace switches.
# TODO: when this flips uc_enabled True->False, prune any
# `system.ai.*` MCP services from state["mcp_servers"] (and their
# cross-tool registrations). Today they linger as orphans pointing at
# /ai-gateway/mcp-services/* until the user re-runs `configure mcp`
# or switches workspaces.
if enable_uc is None:
if reset_uc:
enable_uc = uc_enabled(default=False)
else:
target_ws_state = load_full_state().get("workspaces", {}).get(workspace) or {}
enable_uc = uc_enabled(default=bool(target_ws_state.get("uc_enabled")))
if use_pat is None:
use_pat = bool(prior_state.get("use_pat")) and previous_workspace == workspace
fetch_all = tools is None
Expand Down Expand Up @@ -278,25 +257,23 @@ def configure_shared_state(
claude_models = {}
gemini_models = []
codex_models = []
if enable_uc:
# Opt-in: one UC model-services call yields all families as
# `system.ai.<model-name>` ids, bucketed by name. The single reason is
# shared across the families that were requested.
with spinner("Fetching available models (model services)..."):
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
# UC-first, best-effort: one UC model-services call yields all families as
# `system.ai.<model-name>` ids, bucketed by name. If a family comes back
# empty (workspace without UC model-services, or the listing failed), fall
# back to the per-family AI Gateway listing for that family only.
with spinner("Fetching available models..."):
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
if want_claude:
claude_models, claude_reason = ms_claude, ms_reason
if not claude_models:
claude_models, claude_reason = discover_claude_models(workspace, token)
if want_gemini:
gemini_models, gemini_reason = ms_gemini, ms_reason
if not gemini_models:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
if want_codex:
codex_models, codex_reason = ms_codex, ms_reason
else:
with spinner("Fetching available models..."):
if want_claude:
claude_models, claude_reason = discover_claude_models(workspace, token)
if want_gemini:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
if want_codex:
if not codex_models:
codex_models, codex_reason = discover_codex_models(workspace, token)
opencode_models: dict[str, list[str]] = {}
if claude_models:
Expand All @@ -311,9 +288,8 @@ def configure_shared_state(
state["profile"] = profile
else:
state.pop("profile", None)
# Persist the resolved flag so subsequent launches stay on the same
# discovery path without the env var or CLI flag being re-passed.
state["uc_enabled"] = enable_uc
# UC discovery is now always-on; drop any flag persisted by older versions.
state.pop("uc_enabled", None)
# Persist the auth mode so launches rebuild the same (PAT-based) agent
# auth command; an explicit re-configure without --use-pat clears it.
if use_pat:
Expand Down Expand Up @@ -349,8 +325,6 @@ def _configure_shared_workspace_states(
tools: list[str] | None,
*,
force_login: bool,
enable_uc: bool | None = None,
reset_uc: bool = False,
use_pat: bool = False,
) -> list[dict]:
if not workspaces:
Expand All @@ -363,8 +337,6 @@ def _configure_shared_workspace_states(
profile=profile,
tools=tools,
force_login=force_login,
enable_uc=enable_uc,
reset_uc=reset_uc,
use_pat=use_pat,
)
)
Expand All @@ -377,8 +349,6 @@ def configure_workspace_command(
workspaces: list[tuple[str, str | None]] | None = None,
*,
prompt_optional_updates: bool = True,
enable_uc: bool | None = None,
reset_uc: bool = False,
use_pat: bool = False,
skip_validate: bool = False,
) -> int:
Expand All @@ -392,8 +362,6 @@ def configure_workspace_command(
workspace_entries,
[tool],
force_login=True,
enable_uc=enable_uc,
reset_uc=reset_uc,
use_pat=use_pat,
)
state = states[0]
Expand Down Expand Up @@ -429,8 +397,6 @@ def configure_workspace_command(
workspace_entries,
selected_tools,
force_login=True,
enable_uc=enable_uc,
reset_uc=reset_uc,
use_pat=use_pat,
)
state = states[0]
Expand Down Expand Up @@ -827,18 +793,6 @@ def configure(
"'low' prints terse single-line status instead.",
),
] = "normal",
enable_uc: Annotated[
bool,
typer.Option(
"--enable-uc",
help="Discover models via UC `model-services` (`system.ai.<model>`) and "
"surface curated `system.ai.*` MCP services. Equivalent to setting "
"UCODE_ENABLE_UC=1 for this configure run. The value is persisted so "
"subsequent `ucode <agent>` launches stay on the same discovery path; "
"re-run `ucode configure` without the flag (and without "
"UCODE_ENABLE_UC=1 in the env) to turn UC discovery back off.",
),
] = False,
) -> None:
"""Configure workspace URL and AI Gateway."""
if ctx.invoked_subcommand is not None:
Expand All @@ -849,10 +803,6 @@ def configure(
set_dry_run(dry_run)
set_verbosity(verbose)
prompt_optional_updates = not skip_upgrade
flag_enable_uc: bool | None = True if enable_uc else None
# Explicit `ucode configure` is a clean slate: when the user omits both
# `--enable-uc` and `UCODE_ENABLE_UC`, persisted `uc_enabled=true` from
# a prior run is reset to false.
try:
install_databricks_cli()
if agent is not None and agents is not None:
Expand Down Expand Up @@ -883,15 +833,11 @@ def configure(
prompt_optional_updates=prompt_optional_updates,
)
if workspace_entries is None:
configure_workspace_command(
tool, enable_uc=flag_enable_uc, reset_uc=True, **skip_kwargs
)
configure_workspace_command(tool, **skip_kwargs)
else:
configure_workspace_command(
tool,
workspaces=workspace_entries,
enable_uc=flag_enable_uc,
reset_uc=True,
**skip_kwargs,
)
elif agents is not None:
Expand All @@ -900,17 +846,13 @@ def configure(
configure_workspace_command(
selected_tools=selected_tools,
prompt_optional_updates=prompt_optional_updates,
enable_uc=flag_enable_uc,
reset_uc=True,
**skip_kwargs,
)
else:
configure_workspace_command(
selected_tools=selected_tools,
workspaces=workspace_entries,
prompt_optional_updates=prompt_optional_updates,
enable_uc=flag_enable_uc,
reset_uc=True,
**skip_kwargs,
)
else:
Expand All @@ -919,16 +861,12 @@ def configure(
if workspace_entries is None:
configure_workspace_command(
prompt_optional_updates=prompt_optional_updates,
enable_uc=flag_enable_uc,
reset_uc=True,
**skip_kwargs,
)
else:
configure_workspace_command(
workspaces=workspace_entries,
prompt_optional_updates=prompt_optional_updates,
enable_uc=flag_enable_uc,
reset_uc=True,
**skip_kwargs,
)
if tracing:
Expand Down
47 changes: 13 additions & 34 deletions src/ucode/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,27 +1050,6 @@ def build_auth_shell_command(
)


def uc_enabled(default: bool = False) -> bool:
"""True when the opt-in UC-securables discovery path is enabled.

Three input precedences, callers handle the highest one first:
1. ``ucode configure --enable-uc / --no-enable-uc`` (resolved by the
CLI before this function is called and passed in via ``default``,
since it overrides everything).
2. ``UCODE_ENABLE_UC=1`` (or true/yes/on) env var.
3. The value persisted in state (sticky, also passed via ``default``).

Enabling UC discovery makes ucode:
- resolve models via UC `model-services` as `system.ai.<model-name>`
instead of the per-family AI Gateway listings
- surface curated `system.ai.*` MCP services in `ucode configure mcp`
"""
raw = os.environ.get("UCODE_ENABLE_UC")
if raw is None or not raw.strip():
return default
return raw.strip().lower() in {"1", "true", "yes", "on"}


# A model-service's `name` is `model-services/system.ai.<model-name>`; the
# part after the prefix is exactly the model string agents send (no
# `databricks-` infix — that only appears on the inner destination name).
Expand All @@ -1097,11 +1076,12 @@ def _model_service_id(service: dict) -> str | None:
return name or None


# The model-services metastore listing is slow and flaky — large pages
# routinely 504 with `Timeout listing model services under metastore`. A small
# page is far more likely to come back, and each page gets a few retries before
# we give up.
_MODEL_SERVICES_PAGE_SIZE = 10
# The model-services metastore listing REQUIRES a bounded `page_size`:
# unparameterized or large-page requests (verified against
# eng-ml-agent-platform.staging 2026-06-14) return `HTTP 499` with an empty
# body, while pages of 10–100 come back reliably. A page can still 499
# intermittently under load, so each gets a few retries before we give up.
_MODEL_SERVICES_PAGE_SIZE = 100
_MODEL_SERVICES_PAGE_RETRIES = 4


Expand All @@ -1110,9 +1090,9 @@ def _get_model_services_page(
) -> tuple[dict | list | None, str | None]:
"""GET one model-services page, retrying on failure.

The endpoint frequently 504s under load; a retry usually succeeds. Returns
the same (payload, reason) shape as ``_http_get_json`` — the last attempt's
result when all retries are exhausted."""
The endpoint intermittently 499/504s under load; a retry usually succeeds.
Returns the same (payload, reason) shape as ``_http_get_json`` — the last
attempt's result when all retries are exhausted."""
payload: dict | list | None = None
reason: str | None = None
for attempt in range(retries):
Expand All @@ -1133,11 +1113,10 @@ def list_model_services(
"""List all `system.ai.*` model ids via the UC model-services API.

Pages through ``/api/2.1/unity-catalog/model-services`` (metastore scope)
and returns the de-duplicated, sorted list of ``system.ai.<model-name>``
ids. Uses a small page size with per-page retries because the endpoint is
slow and frequently 504s. Returns (ids, reason); reason is None on success,
otherwise it describes why the list is empty (HTTP/network error or no
services).
with a bounded ``page_size`` (the endpoint 499s without one) and returns the
de-duplicated, sorted list of ``system.ai.<model-name>`` ids. Returns
(ids, reason); reason is None on success, otherwise it describes why the
list is empty (HTTP/network error or no services).
"""
hostname = workspace_hostname(workspace)
ids: list[str] = []
Expand Down
18 changes: 6 additions & 12 deletions src/ucode/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
list_databricks_connections,
list_genie_spaces,
list_mcp_services,
uc_enabled,
workspace_hostname,
)
from ucode.state import load_full_state, load_state, save_state
Expand Down Expand Up @@ -990,17 +989,12 @@ def configure_mcp_command() -> int:
"Databricks apps",
lambda: discover_app_mcp_servers(workspace, profile),
)
# Curated `system.ai.*` MCP services live behind a separate UC API and
# are gated on the same UC opt-in that enables model-services discovery
# (env: UCODE_ENABLE_UC, CLI: `ucode configure --enable-uc`, persisted
# in state on configure).
available_mcp_service_names = (
_discover_mcp_source(
"MCP services",
lambda: discover_mcp_service_names(workspace, profile),
)
if uc_enabled(default=bool(state.get("uc_enabled")))
else []
# Curated `system.ai.*` MCP services live behind a separate UC API. Like
# the other sources this is best-effort — `_discover_mcp_source` swallows
# failures and returns [] so workspaces without them just see nothing extra.
available_mcp_service_names = _discover_mcp_source(
"MCP services",
lambda: discover_mcp_service_names(workspace, profile),
)

original_mcp_servers: list[dict] = list(state.get("mcp_servers") or [])
Expand Down
Loading
Loading