Skip to content

Commit 0d4dc1b

Browse files
Add UCODE_USE_UC_SECURABLES opt-in for UC model-services and MCP-services discovery (#151)
* Add UCODE_USE_MODEL_SERVICES opt-in for UC model-services discovery When UCODE_USE_MODEL_SERVICES is set, discover models via the Unity Catalog model-services API and address them as system.ai.<model-name> instead of the per-family AI Gateway listings. Base URLs are unchanged — only the model name differs. The flag is sticky: it's persisted into state at configure time so launches honor it without re-exporting the env var (an explicit env var still wins). Discovery uses a small page size with per-page retries because the metastore listing is slow and frequently 504s, and filters to system.ai.* so model services from other schemas don't leak into the family buckets. Co-authored-by: Isaac * add discovery for system.ai mcp services with UCODE_USE_UC_SECURABLES set * opt-in with UCODE_ENABLE_UC=1 or --enable-uc flag + e2e test --------- Co-authored-by: Anjali Sujithan <anjali.sujithan@databricks.com>
1 parent ce52ffc commit 0d4dc1b

10 files changed

Lines changed: 924 additions & 30 deletions

File tree

src/ucode/agents/claude.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,11 @@ def _resolve_web_search_model(state: dict) -> str | None:
6262

6363

6464
WEB_SEARCH_MCP_NAME = "web_search"
65-
_CLAUDE_MODEL_RE = re.compile(r"^databricks-claude-(opus|sonnet)-(\d+)-(\d+)(.*)$")
65+
# Matches both the AI Gateway form (`databricks-claude-opus-4-8`) and the UC
66+
# model-services form (`system.ai.claude-opus-4-8`).
67+
_CLAUDE_MODEL_RE = re.compile(
68+
r"^(?:system\.ai\.)?(?:databricks-)?claude-(opus|sonnet)-(\d+)-(\d+)(.*)$"
69+
)
6670

6771
# Env keys the MLflow Stop hook reads to route traces. Written into the
6872
# settings `env` block alongside the hook itself.

src/ucode/agents/codex.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,10 @@ def _openai_model_id(model: str | None) -> str | None:
255255

256256

257257
def _codex_model_id(model: str | None) -> str | None:
258+
# UC model-services ids (`system.ai.gpt-5`) route by name through the
259+
# gateway, so they must be sent verbatim — not rewritten to an OpenAI id.
260+
if model and model.startswith("system.ai."):
261+
return model
258262
if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS:
259263
return model
260264
return _openai_model_id(model)
@@ -263,7 +267,12 @@ def _codex_model_id(model: str | None) -> str | None:
263267
def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None:
264268
if not model:
265269
return None
266-
match = _GPT_RE.fullmatch(model.split("/")[-1])
270+
# Strip the UC model-services prefix so `system.ai.gpt-5` parses for version
271+
# selection; the original id is preserved by callers that need it verbatim.
272+
tail = model.split("/")[-1]
273+
if tail.startswith("system.ai."):
274+
tail = tail[len("system.ai.") :]
275+
match = _GPT_RE.fullmatch(tail)
267276
if not match:
268277
return None
269278
major, minor, patch, suffix = match.groups()

src/ucode/cli.py

Lines changed: 103 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
discover_claude_models,
3434
discover_codex_models,
3535
discover_gemini_models,
36+
discover_model_services,
3637
ensure_ai_gateway_v2,
3738
ensure_databricks_auth,
3839
find_profile_name_for_host,
@@ -41,6 +42,7 @@
4142
install_databricks_cli,
4243
normalize_workspace_url,
4344
run_databricks_login,
45+
uc_enabled,
4446
)
4547
from ucode.mcp import (
4648
MCP_CLIENTS,
@@ -149,6 +151,8 @@ def configure_shared_state(
149151
profile: str | None = None,
150152
tools: list[str] | None = None,
151153
force_login: bool = False,
154+
enable_uc: bool | None = None,
155+
reset_uc: bool = False,
152156
) -> dict:
153157
"""Log into Databricks, enforce AI Gateway v2, fetch model lists, persist state.
154158
@@ -158,9 +162,28 @@ def configure_shared_state(
158162
``--profile`` to every CLI invocation so ambiguous `~/.databrickscfg`
159163
entries (e.g. DEFAULT and a named profile both pointing at the same host)
160164
don't error out. If ``None``, we resolve it from the host after login.
165+
``enable_uc`` is the resolved CLI flag (`--enable-uc`): when not None
166+
it overrides both the env var and the persisted state.
167+
``reset_uc`` is True only on the explicit ``ucode configure`` flow.
161168
"""
162169
workspace = normalize_workspace_url(workspace)
163-
previous_workspace = load_state().get("workspace")
170+
prior_state = load_state()
171+
previous_workspace = prior_state.get("workspace")
172+
# Precedence: explicit CLI flag > env var > (configure: reset to False;
173+
# launch: target workspace's persisted state). Use *target* state on the
174+
# launch path so the flag is sticky per-workspace and doesn't leak
175+
# across workspace switches.
176+
# TODO: when this flips uc_enabled True->False, prune any
177+
# `system.ai.*` MCP services from state["mcp_servers"] (and their
178+
# cross-tool registrations). Today they linger as orphans pointing at
179+
# /ai-gateway/mcp-services/* until the user re-runs `configure mcp`
180+
# or switches workspaces.
181+
if enable_uc is None:
182+
if reset_uc:
183+
enable_uc = uc_enabled(default=False)
184+
else:
185+
target_ws_state = load_full_state().get("workspaces", {}).get(workspace) or {}
186+
enable_uc = uc_enabled(default=bool(target_ws_state.get("uc_enabled")))
164187
fetch_all = tools is None
165188
if force_login:
166189
run_databricks_login(workspace, profile)
@@ -184,19 +207,29 @@ def configure_shared_state(
184207
claude_reason: str | None = None
185208
gemini_reason: str | None = None
186209
codex_reason: str | None = None
187-
with spinner("Fetching available models..."):
210+
claude_models = {}
211+
gemini_models = []
212+
codex_models = []
213+
if enable_uc:
214+
# Opt-in: one UC model-services call yields all families as
215+
# `system.ai.<model-name>` ids, bucketed by name. The single reason is
216+
# shared across the families that were requested.
217+
with spinner("Fetching available models (model services)..."):
218+
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
188219
if want_claude:
189-
claude_models, claude_reason = discover_claude_models(workspace, token)
190-
else:
191-
claude_models = {}
220+
claude_models, claude_reason = ms_claude, ms_reason
192221
if want_gemini:
193-
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
194-
else:
195-
gemini_models = []
222+
gemini_models, gemini_reason = ms_gemini, ms_reason
196223
if want_codex:
197-
codex_models, codex_reason = discover_codex_models(workspace, token)
198-
else:
199-
codex_models = []
224+
codex_models, codex_reason = ms_codex, ms_reason
225+
else:
226+
with spinner("Fetching available models..."):
227+
if want_claude:
228+
claude_models, claude_reason = discover_claude_models(workspace, token)
229+
if want_gemini:
230+
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
231+
if want_codex:
232+
codex_models, codex_reason = discover_codex_models(workspace, token)
200233
opencode_models: dict[str, list[str]] = {}
201234
if claude_models:
202235
opencode_models["anthropic"] = list(claude_models.values())
@@ -210,6 +243,9 @@ def configure_shared_state(
210243
state["profile"] = profile
211244
else:
212245
state.pop("profile", None)
246+
# Persist the resolved flag so subsequent launches stay on the same
247+
# discovery path without the env var or CLI flag being re-passed.
248+
state["uc_enabled"] = enable_uc
213249
state["base_urls"] = build_shared_base_urls(workspace)
214250
if want_claude:
215251
state["claude_models"] = claude_models
@@ -239,13 +275,22 @@ def _configure_shared_workspace_states(
239275
tools: list[str] | None,
240276
*,
241277
force_login: bool,
278+
enable_uc: bool | None = None,
279+
reset_uc: bool = False,
242280
) -> list[dict]:
243281
if not workspaces:
244282
raise RuntimeError("At least one workspace must be provided.")
245283
states: list[dict] = []
246284
for workspace, profile in workspaces:
247285
states.append(
248-
configure_shared_state(workspace, profile=profile, tools=tools, force_login=force_login)
286+
configure_shared_state(
287+
workspace,
288+
profile=profile,
289+
tools=tools,
290+
force_login=force_login,
291+
enable_uc=enable_uc,
292+
reset_uc=reset_uc,
293+
)
249294
)
250295
return states
251296

@@ -256,14 +301,18 @@ def configure_workspace_command(
256301
workspaces: list[tuple[str, str | None]] | None = None,
257302
*,
258303
prompt_optional_updates: bool = True,
304+
enable_uc: bool | None = None,
305+
reset_uc: bool = False,
259306
) -> int:
260307
if tool is not None and selected_tools is not None:
261308
raise RuntimeError("Use either --agent or --agents, not both.")
262309

263310
workspace_entries = workspaces or [_prompt_for_configuration(tool)]
264311

265312
if tool is not None:
266-
states = _configure_shared_workspace_states(workspace_entries, [tool], force_login=True)
313+
states = _configure_shared_workspace_states(
314+
workspace_entries, [tool], force_login=True, enable_uc=enable_uc, reset_uc=reset_uc
315+
)
267316
state = states[0]
268317
state = configure_single_tool(tool, state)
269318
spec = TOOL_SPECS[tool]
@@ -290,7 +339,13 @@ def configure_workspace_command(
290339
raise RuntimeError(f"{spec['display']} validation failed — config reverted.")
291340
return 0
292341

293-
states = _configure_shared_workspace_states(workspace_entries, selected_tools, force_login=True)
342+
states = _configure_shared_workspace_states(
343+
workspace_entries,
344+
selected_tools,
345+
force_login=True,
346+
enable_uc=enable_uc,
347+
reset_uc=reset_uc,
348+
)
294349
state = states[0]
295350
save_state(state)
296351

@@ -649,6 +704,18 @@ def configure(
649704
"'low' prints terse single-line status instead.",
650705
),
651706
] = "normal",
707+
enable_uc: Annotated[
708+
bool,
709+
typer.Option(
710+
"--enable-uc",
711+
help="Discover models via UC `model-services` (`system.ai.<model>`) and "
712+
"surface curated `system.ai.*` MCP services. Equivalent to setting "
713+
"UCODE_ENABLE_UC=1 for this configure run. The value is persisted so "
714+
"subsequent `ucode <agent>` launches stay on the same discovery path; "
715+
"re-run `ucode configure` without the flag (and without "
716+
"UCODE_ENABLE_UC=1 in the env) to turn UC discovery back off.",
717+
),
718+
] = False,
652719
) -> None:
653720
"""Configure workspace URL and AI Gateway."""
654721
if ctx.invoked_subcommand is not None:
@@ -659,6 +726,10 @@ def configure(
659726
set_dry_run(dry_run)
660727
set_verbosity(verbose)
661728
prompt_optional_updates = not skip_upgrade
729+
flag_enable_uc: bool | None = True if enable_uc else None
730+
# Explicit `ucode configure` is a clean slate: when the user omits both
731+
# `--enable-uc` and `UCODE_ENABLE_UC`, persisted `uc_enabled=true` from
732+
# a prior run is reset to false.
662733
try:
663734
install_databricks_cli()
664735
if agent is not None and agents is not None:
@@ -673,31 +744,46 @@ def configure(
673744
prompt_optional_updates=prompt_optional_updates,
674745
)
675746
if workspace_entries is None:
676-
configure_workspace_command(tool)
747+
configure_workspace_command(tool, enable_uc=flag_enable_uc, reset_uc=True)
677748
else:
678-
configure_workspace_command(tool, workspaces=workspace_entries)
749+
configure_workspace_command(
750+
tool,
751+
workspaces=workspace_entries,
752+
enable_uc=flag_enable_uc,
753+
reset_uc=True,
754+
)
679755
elif agents is not None:
680756
selected_tools = _parse_agents_option(agents)
681757
if workspace_entries is None:
682758
configure_workspace_command(
683759
selected_tools=selected_tools,
684760
prompt_optional_updates=prompt_optional_updates,
761+
enable_uc=flag_enable_uc,
762+
reset_uc=True,
685763
)
686764
else:
687765
configure_workspace_command(
688766
selected_tools=selected_tools,
689767
workspaces=workspace_entries,
690768
prompt_optional_updates=prompt_optional_updates,
769+
enable_uc=flag_enable_uc,
770+
reset_uc=True,
691771
)
692772
else:
693773
# Tool binaries are installed after the user picks which agents
694774
# they want, in configure_workspace_command.
695775
if workspace_entries is None:
696-
configure_workspace_command(prompt_optional_updates=prompt_optional_updates)
776+
configure_workspace_command(
777+
prompt_optional_updates=prompt_optional_updates,
778+
enable_uc=flag_enable_uc,
779+
reset_uc=True,
780+
)
697781
else:
698782
configure_workspace_command(
699783
workspaces=workspace_entries,
700784
prompt_optional_updates=prompt_optional_updates,
785+
enable_uc=flag_enable_uc,
786+
reset_uc=True,
701787
)
702788
if tracing:
703789
# The workspaces were just configured, so enable tracing for them

0 commit comments

Comments
 (0)