diff --git a/CLAUDE.md b/CLAUDE.md index 8630b30..7d201b4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -122,6 +122,33 @@ Do NOT suggest running `uvicorn` or `npm run dev` locally. The app depends on Da - **Python 3.11+** required (`pyproject.toml`). Uses `uv` for dependency management (`uv.lock` present). - **Root `package.json`** exists solely as a build hook for Databricks Apps — `postinstall` chains to `frontend/npm install`, `build` chains to `frontend/npm run build`. +## Agent Deployment Layer + +The `agents/` directory provides an optional agent deployment layer using `@app_agent` from `dbx-agent-app`. Each agent wraps existing domain logic from `backend/services/` and exposes it as a standalone Databricks agent with A2A discovery, MCP server, and eval support. + +See `docs/architecture-proposal.md` for the full design and implementation roadmap. + +``` +agents/ + _shared/ # Auth bridge, Lakebase pool, SP fallback + scorer/app.py # Wraps scanner.py + analyzer/app.py # Wraps analyzer.py + creator/app.py # Wraps create_agent.py + optimizer/app.py # Wraps optimizer.py + fixer/app.py # Wraps fix_agent.py + supervisor/proxy.py # Frontend proxy for agent deployment mode +agents.yaml # Multi-agent deployment config +``` + +## GenieRX Specification + +`docs/genierx-spec.md` defines the analysis and recommendation taxonomy. Key concepts: +- **Authoritative Facts** — raw data from systems of record, safe to surface directly +- **Canonical Metrics** — governed KPIs with stable definitions +- **Heuristic Signals** — derived fields with subjective thresholds; must carry caveats + +Consult the spec when working on analysis, scoring, or recommendation features. + ## Code Style - Backend: Python, Pydantic models, FastAPI routers, no class-based views diff --git a/agents/_shared/__init__.py b/agents/_shared/__init__.py new file mode 100644 index 0000000..04942cf --- /dev/null +++ b/agents/_shared/__init__.py @@ -0,0 +1,7 @@ +"""Shared utilities for Genie Workbench agents. + +Provides cross-cutting concerns that multiple agents need: +- auth_bridge: Bridge @app_agent UserContext into monolith + AI Dev Kit auth +- sp_fallback: Service principal fallback for Genie API scope errors +- lakebase_client: Shared PostgreSQL connection pool management +""" diff --git a/agents/_shared/auth_bridge.py b/agents/_shared/auth_bridge.py new file mode 100644 index 0000000..e6a8071 --- /dev/null +++ b/agents/_shared/auth_bridge.py @@ -0,0 +1,125 @@ +"""Bridge @app_agent UserContext into both monolith and AI Dev Kit auth systems. + +During migration, agent tools receive `request.user_context` from @app_agent, +but domain logic (scanner, genie_client, etc.) calls `get_workspace_client()` +from the monolith's auth module. And `databricks-tools-core` functions use +their own separate ContextVars via `set_databricks_auth()`. + +This module provides `obo_context()` — a single context manager that sets up +all three auth systems so existing domain logic works unchanged inside agents. + +Source patterns: + - backend/services/auth.py:25 (_obo_client ContextVar) + - backend/services/auth.py:33-58 (set_obo_user_token) + - databricks_tools_core/auth.py (set_databricks_auth / clear_databricks_auth) +""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from contextvars import ContextVar +from typing import Optional + +from databricks.sdk import WorkspaceClient +from databricks.sdk.config import Config + + +# Monolith-compatible ContextVar (mirrors backend/services/auth.py:25) +_obo_client: ContextVar[Optional[WorkspaceClient]] = ContextVar( + "_obo_client", default=None +) + +# Singleton SP client (lazy-initialized) +_sp_client: Optional[WorkspaceClient] = None + + +@contextmanager +def obo_context(access_token: str, host: Optional[str] = None): + """Set up OBO auth for monolith code and databricks-tools-core. + + Creates a per-request WorkspaceClient from the user's OBO token and + stores it in both the monolith ContextVar and the AI Dev Kit ContextVars. + + Usage in any agent tool:: + + @scorer.tool(description="Run IQ scan on a Genie Space") + async def scan_space(space_id: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + # All of these now work: + # - get_workspace_client() returns OBO client + # - databricks-tools-core functions use OBO token + result = scanner.calculate_score(space_id) + + For streaming generators, capture the token before yielding and + re-enter obo_context() per-yield. This matches the pattern in + backend/routers/create.py:125-198. + + Args: + access_token: The user's OBO access token. + host: Databricks workspace host. Defaults to DATABRICKS_HOST env var. + + Yields: + WorkspaceClient configured with the user's OBO token. + """ + resolved_host = host or os.environ.get("DATABRICKS_HOST", "") + + # 1. Create OBO WorkspaceClient (monolith pattern from auth.py:49-58) + # Must set auth_type="pat" and clear client_id/client_secret to prevent + # the SDK from using oauth-m2m from env vars on Databricks Apps. + cfg = Config( + host=resolved_host, + token=access_token, + auth_type="pat", + client_id=None, + client_secret=None, + ) + client = WorkspaceClient(config=cfg) + token = _obo_client.set(client) + + # 2. Set databricks-tools-core ContextVars (if available) + has_tools_core = False + try: + from databricks_tools_core.auth import ( + set_databricks_auth, + clear_databricks_auth, + ) + + set_databricks_auth(resolved_host, access_token) + has_tools_core = True + except ImportError: + pass + + try: + yield client + finally: + _obo_client.reset(token) + if has_tools_core: + clear_databricks_auth() + + +def get_workspace_client() -> WorkspaceClient: + """Drop-in replacement for backend.services.auth.get_workspace_client(). + + Returns the OBO client if inside an obo_context(), otherwise the default + singleton (SP on Databricks Apps, CLI/PAT locally). + + Domain logic can import this instead of the monolith version during + migration — the behavior is identical. + """ + obo = _obo_client.get() + if obo is not None: + return obo + return get_service_principal_client() + + +def get_service_principal_client() -> WorkspaceClient: + """Get the service principal client (bypasses OBO). + + Used for app-level operations and as fallback when the user's OBO token + lacks required scopes (e.g., Genie API before consent flow). + """ + global _sp_client + if _sp_client is None: + _sp_client = WorkspaceClient() + return _sp_client diff --git a/agents/_shared/lakebase_client.py b/agents/_shared/lakebase_client.py new file mode 100644 index 0000000..e850585 --- /dev/null +++ b/agents/_shared/lakebase_client.py @@ -0,0 +1,182 @@ +"""Shared Lakebase (PostgreSQL) connection pool management. + +Each agent initializes its own pool from its own app.yaml env vars +(LAKEBASE_HOST, LAKEBASE_INSTANCE_NAME, etc.). Schema migrations are +idempotent (IF NOT EXISTS) so agents can boot in any order. + +Domain-specific query functions (save_scan_result, get_score_history, etc.) +stay in each agent's own module — this shared client only manages the pool +lifecycle, credential generation, and DDL. + +Source: backend/services/lakebase.py (269 lines) +""" + +from __future__ import annotations + +import logging +import os +from typing import Optional + +logger = logging.getLogger(__name__) + +_pool = None +_lakebase_available = False + +# In-memory fallback (same pattern as backend/services/lakebase.py:12-17) +_memory_store: dict = { + "scans": {}, + "history": {}, + "stars": set(), + "seen": set(), + "sessions": {}, +} + + +# ── DDL statements per agent (all use IF NOT EXISTS) ────────────────────────── + +SCORER_DDL = [ + """CREATE TABLE IF NOT EXISTS scan_results ( + space_id TEXT NOT NULL, + score INTEGER NOT NULL, + maturity TEXT, + breakdown JSONB, + findings JSONB, + next_steps JSONB, + scanned_at TIMESTAMPTZ NOT NULL, + UNIQUE (space_id, scanned_at) + )""", + "CREATE TABLE IF NOT EXISTS starred_spaces (space_id TEXT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS seen_spaces (space_id TEXT PRIMARY KEY)", +] + +CREATOR_DDL = [ + """CREATE TABLE IF NOT EXISTS agent_sessions ( + session_id TEXT PRIMARY KEY, + data JSONB NOT NULL, + updated_at TIMESTAMPTZ DEFAULT NOW() + )""", +] + + +# ── Credential generation (mirrors backend/services/lakebase.py:23-59) ──────── + +def _generate_lakebase_credential() -> tuple[str, str] | None: + """Generate Lakebase OAuth credentials using the Databricks SDK.""" + instance_name = os.environ.get("LAKEBASE_INSTANCE_NAME") + if not instance_name: + return None + + try: + from agents._shared.auth_bridge import get_service_principal_client + + client = get_service_principal_client() + resp = client.api_client.do( + method="POST", + path="/api/2.0/database/credentials", + body={ + "request_id": "lakebase-pool", + "instance_names": [instance_name], + }, + ) + token = resp.get("token") + if not token: + logger.warning("Lakebase credential response missing token") + return None + + user = os.environ.get("LAKEBASE_USER") + if not user: + try: + me = client.current_user.me() + user = me.user_name + except Exception: + user = "databricks" + + logger.info("Generated Lakebase credential via SDK (user=%s)", user) + return user, token + except Exception as e: + logger.warning("Lakebase credential generation failed: %s", e) + return None + + +# ── Pool lifecycle ──────────────────────────────────────────────────────────── + +async def init_pool(ddl_statements: Optional[list[str]] = None): + """Initialize asyncpg pool and run idempotent DDL. + + Call this at agent startup (e.g., in a FastAPI lifespan handler). + + Args: + ddl_statements: SQL DDL to execute after connecting. + Use SCORER_DDL, CREATOR_DDL, or combine them. + """ + global _pool, _lakebase_available + + host = os.environ.get("LAKEBASE_HOST") + if not host: + logger.info("LAKEBASE_HOST not set — using in-memory fallback") + return + + password = os.environ.get("LAKEBASE_PASSWORD") + user = os.environ.get("LAKEBASE_USER", "postgres") + + if not password: + cred = _generate_lakebase_credential() + if cred: + user, password = cred + else: + logger.warning( + "No LAKEBASE_PASSWORD and credential generation failed " + "— using in-memory fallback" + ) + return + + try: + import asyncpg + + _pool = await asyncpg.create_pool( + host=host, + port=int(os.environ.get("LAKEBASE_PORT", "5432")), + database=os.environ.get("LAKEBASE_DATABASE", "databricks_postgres"), + user=user, + password=password, + min_size=2, + max_size=10, + command_timeout=30, + ssl="require", + ) + _lakebase_available = True + logger.info("Lakebase connection pool initialized") + + # Run idempotent DDL + if ddl_statements and _pool: + async with _pool.acquire() as conn: + for ddl in ddl_statements: + await conn.execute(ddl) + logger.info("Executed %d DDL statements", len(ddl_statements)) + + except Exception as e: + logger.warning("Lakebase unavailable: %s. Using in-memory fallback.", e) + _lakebase_available = False + + +async def close_pool(): + """Close the connection pool. Call at agent shutdown.""" + global _pool + if _pool: + await _pool.close() + _pool = None + + +async def get_pool(): + """Get the connection pool (or None if using in-memory fallback).""" + return _pool + + +def is_available() -> bool: + """Check if Lakebase is connected.""" + return _lakebase_available + + +def get_memory_store() -> dict: + """Get the in-memory fallback store (for when Lakebase is unavailable).""" + return _memory_store diff --git a/agents/_shared/sp_fallback.py b/agents/_shared/sp_fallback.py new file mode 100644 index 0000000..b0bcfad --- /dev/null +++ b/agents/_shared/sp_fallback.py @@ -0,0 +1,93 @@ +"""Service Principal fallback for Genie API scope errors. + +When OBO tokens lack the 'genie' scope (before the user consent flow is +triggered), the Genie API returns scope errors. This module extracts the +retry-with-SP pattern from the monolith into a reusable decorator and +convenience function. + +Source pattern: backend/services/genie_client.py:22-68 +""" + +from __future__ import annotations + +import functools +import logging +from typing import Callable, TypeVar + +from agents._shared.auth_bridge import ( + get_workspace_client, + get_service_principal_client, +) + +logger = logging.getLogger(__name__) + +T = TypeVar("T") + + +def _is_scope_error(e: Exception) -> bool: + """Check if exception is a missing OAuth scope error. + + Matches the same check in backend/services/genie_client.py:22-25. + """ + msg = str(e).lower() + return "scope" in msg or "insufficient_scope" in msg + + +def with_sp_fallback(func: Callable[..., T]) -> Callable[..., T]: + """Decorator: retry with SP client if OBO token lacks Genie scope. + + The decorated function must accept a ``client`` keyword argument + (a WorkspaceClient). On scope error, the function is retried with + the service principal client. + + Usage:: + + @with_sp_fallback + def get_genie_space(space_id: str, *, client=None): + client = client or get_workspace_client() + return client.api_client.do( + "GET", f"/api/2.0/genie/spaces/{space_id}" + ) + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + if _is_scope_error(e): + logger.info( + "%s: OBO scope error, retrying with service principal", + func.__name__, + ) + kwargs["client"] = get_service_principal_client() + return func(*args, **kwargs) + raise + + return wrapper + + +def genie_api_call(method: str, path: str, **kwargs): + """Make a Genie API call with automatic SP fallback. + + Convenience function for simple one-off API calls that don't need + the full decorator pattern. + + Args: + method: HTTP method (GET, POST, etc.) + path: API path (e.g., "/api/2.0/genie/spaces/{id}") + **kwargs: Forwarded to ``client.api_client.do()``. + + Returns: + API response dict. + """ + client = get_workspace_client() + try: + return client.api_client.do(method=method, path=path, **kwargs) + except Exception as e: + if _is_scope_error(e): + logger.info("Genie API %s: scope error, retrying with SP", path) + sp = get_service_principal_client() + if sp is not client: + return sp.api_client.do(method=method, path=path, **kwargs) + raise diff --git a/agents/analyzer/app.py b/agents/analyzer/app.py new file mode 100644 index 0000000..c3bdef4 --- /dev/null +++ b/agents/analyzer/app.py @@ -0,0 +1,133 @@ +"""genie-analyzer — analysis, querying, and SQL execution agent. + +Wraps: + - backend/routers/analysis.py (fetch, parse, query, SQL, benchmark compare) + - backend/services/genie_client.py (space fetching, Genie queries) + - backend/sql_executor.py (SQL warehouse execution) + +Streaming: No (all request/response) +LLM: Yes (benchmark comparison uses LLM) +""" + +from __future__ import annotations + +from dbx_agent_app import AgentRequest, AgentResponse, app_agent + +from agents._shared.auth_bridge import obo_context + + +@app_agent( + name="genie-analyzer", + description=( + "Fetches and parses Genie Space configurations, queries Genie for SQL, " + "executes SQL on warehouses, and compares benchmark results." + ), +) +async def analyzer(request: AgentRequest) -> AgentResponse: + """Route incoming agent requests to analysis tools.""" + ... + + +# ── Tools ──────────────────────────────────────────────────────────────────── + + +@analyzer.tool( + description=( + "Fetch and parse a Genie Space by ID. Returns the space " + "configuration data." + ), +) +async def fetch_space(genie_space_id: str, request: AgentRequest) -> dict: + """Wraps backend/services/genie_client.py::get_serialized_space""" + with obo_context(request.user_context.access_token): + from backend.services.genie_client import get_serialized_space + space_data = get_serialized_space(genie_space_id) + return {"genie_space_id": genie_space_id, "space_data": space_data} + + +@analyzer.tool( + description="Parse pasted Genie Space JSON from the API response.", +) +async def parse_space_json(json_content: str) -> dict: + """Wraps backend/routers/analysis.py::parse_space_json logic""" + import json + from datetime import datetime + + raw_response = json.loads(json_content) + if "serialized_space" not in raw_response: + raise ValueError("Missing 'serialized_space' field in JSON") + + serialized = raw_response["serialized_space"] + space_data = json.loads(serialized) if isinstance(serialized, str) else serialized + genie_space_id = f"pasted-{datetime.now().strftime('%Y%m%d-%H%M%S')}" + + return {"genie_space_id": genie_space_id, "space_data": space_data} + + +@analyzer.tool( + description=( + "Query a Genie Space with a natural language question. " + "Returns the generated SQL if successful." + ), +) +async def query_genie(genie_space_id: str, question: str, request: AgentRequest) -> dict: + """Wraps backend/services/genie_client.py::query_genie_for_sql""" + with obo_context(request.user_context.access_token): + from backend.services.genie_client import query_genie_for_sql + return query_genie_for_sql( + genie_space_id=genie_space_id, + question=question, + ) + + +@analyzer.tool( + description=( + "Execute a read-only SQL query on a Databricks SQL Warehouse. " + "Returns tabular results limited to 1000 rows." + ), +) +async def execute_sql(sql: str, warehouse_id: str | None = None, request: AgentRequest = None) -> dict: + """Wraps backend/sql_executor.py::execute_sql""" + with obo_context(request.user_context.access_token): + from backend.sql_executor import execute_sql as _execute + return _execute(sql=sql, warehouse_id=warehouse_id) + + +@analyzer.tool( + description=( + "Compare Genie SQL results against expected SQL results using " + "LLM-based semantic comparison. Returns match type, confidence, " + "and an auto-label suggestion." + ), +) +async def compare_results( + genie_result: dict, + expected_result: dict, + genie_sql: str | None = None, + expected_sql: str | None = None, + question: str | None = None, + request: AgentRequest = None, +) -> dict: + """Wraps backend/services/result_comparator.py::compare_results""" + import asyncio + with obo_context(request.user_context.access_token): + from backend.services.auth import run_in_context + from backend.services.result_comparator import compare_results as _compare + + result = await asyncio.get_running_loop().run_in_executor( + None, + run_in_context( + _compare, + genie_result=genie_result, + expected_result=expected_result, + genie_sql=genie_sql, + expected_sql=expected_sql, + question=question, + ), + ) + return result.model_dump() if hasattr(result, "model_dump") else result + + +# ── Standalone entry point ─────────────────────────────────────────────────── + +app = analyzer.app diff --git a/agents/analyzer/app.yaml b/agents/analyzer/app.yaml new file mode 100644 index 0000000..3f82b08 --- /dev/null +++ b/agents/analyzer/app.yaml @@ -0,0 +1,8 @@ +command: ["uvicorn", "agents.analyzer.app:app", "--host", "0.0.0.0", "--port", "8000"] +env: + - name: DATABRICKS_HOST + value: "" + - name: SQL_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: LLM_MODEL + value: "databricks-claude-sonnet-4-6" diff --git a/agents/creator/app.py b/agents/creator/app.py new file mode 100644 index 0000000..2388a66 --- /dev/null +++ b/agents/creator/app.py @@ -0,0 +1,188 @@ +"""genie-creator — Conversational wizard for building new Genie Spaces. + +Wraps: + - backend/services/create_agent.py (CreateGenieAgent tool-calling loop) + - backend/services/create_agent_tools.py (16 tool implementations + dispatcher) + - backend/services/create_agent_session.py (two-tier session persistence) + - backend/services/uc_client.py (UC browsing) + - backend/prompts_create/ (dynamic prompt assembly) + - backend/genie_creator.py (Genie API write operations) + +This is the most complex agent — 16 tools, session persistence, LLM +tool-calling loop with message compaction. + +Streaming: Yes (SSE for agent chat) +LLM: Yes (tool-calling loop with Claude) +""" + +from __future__ import annotations + +from dbx_agent_app import AgentRequest, AgentResponse, app_agent + +from agents._shared.auth_bridge import obo_context +from agents.creator.schemas import GenerateConfigArgs + + +@app_agent( + name="genie-creator", + description=( + "Conversational wizard for building new Genie Spaces. Guides users " + "through requirements gathering, data source discovery, table " + "inspection, plan presentation, config generation, and space creation." + ), +) +async def creator(request: AgentRequest) -> AgentResponse: + """Route incoming agent requests to the creator workflow.""" + ... + + +# ── Helper ─────────────────────────────────────────────────────────────────── + +def _call_tool(name: str, arguments: dict, session_config: dict | None = None) -> dict: + """Dispatch to backend/services/create_agent_tools.py::handle_tool_call.""" + from backend.services.create_agent_tools import handle_tool_call + return handle_tool_call(name, arguments, session_config=session_config) + + +# ── UC Discovery Tools ────────────────────────────────────────────────────── + + +@creator.tool(description="List all Unity Catalog catalogs the user has access to.") +async def discover_catalogs(request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("discover_catalogs", {}) + + +@creator.tool(description="List schemas within a catalog.") +async def discover_schemas(catalog: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("discover_schemas", {"catalog": catalog}) + + +@creator.tool(description="List tables within a catalog.schema.") +async def discover_tables(catalog: str, schema: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("discover_tables", {"catalog": catalog, "schema": schema}) + + +# ── Table Inspection Tools ────────────────────────────────────────────────── + + +@creator.tool( + description="Get detailed table metadata: columns, types, descriptions, row count, sample rows.", +) +async def describe_table(table: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("describe_table", {"table": table}) + + +@creator.tool( + description="Profile selected columns: distinct values, null percentage, min/max.", +) +async def profile_columns(table: str, columns: list[str] | None = None, request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("profile_columns", {"table": table, "columns": columns}) + + +@creator.tool( + description="Assess data quality: null rates, duplicate rates, freshness, anomalies.", +) +async def assess_data_quality(tables: list[str], request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("assess_data_quality", {"tables": tables}) + + +@creator.tool( + description="Profile table usage patterns: query frequency, common joins, active users.", +) +async def profile_table_usage(tables: list[str], request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("profile_table_usage", {"tables": tables}) + + +@creator.tool(description="Execute a test SQL query and return results (read-only, max 5 rows).") +async def test_sql(sql: str, request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("test_sql", {"sql": sql}) + + +@creator.tool(description="List available SQL warehouses for the user.") +async def discover_warehouses(request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("discover_warehouses", {}) + + +# ── Config Generation Tools ───────────────────────────────────────────────── + + +@creator.tool(description="Get the Genie Space configuration JSON schema reference.") +async def get_config_schema(request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("get_config_schema", {}) + + +@creator.tool( + description=( + "Generate a complete Genie Space configuration from discovered " + "tables, inspection data, and user requirements." + ), + parameters=GenerateConfigArgs.model_json_schema(), +) +async def generate_config(request: AgentRequest = None, **kwargs) -> dict: + args = GenerateConfigArgs(**kwargs) + with obo_context(request.user_context.access_token): + return _call_tool("generate_config", args.model_dump()) + + +@creator.tool( + description="Present the space creation plan to the user for review before generating config.", + parameters=GenerateConfigArgs.model_json_schema(), +) +async def present_plan(request: AgentRequest = None, **kwargs) -> dict: + args = GenerateConfigArgs(**kwargs) + with obo_context(request.user_context.access_token): + return _call_tool("present_plan", args.model_dump()) + + +@creator.tool(description="Validate a generated configuration against the Genie Space schema.") +async def validate_config(config: dict, request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("validate_config", {"config": config}) + + +@creator.tool(description="Apply incremental updates to an existing generated configuration.") +async def update_config(config: dict, updates: dict, request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("update_config", {"config": config, "updates": updates}) + + +@creator.tool( + description="Create a new Genie Space in the workspace with the generated configuration.", +) +async def create_space( + display_name: str, + config: dict, + parent_path: str | None = None, + warehouse_id: str | None = None, + request: AgentRequest = None, +) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("create_space", { + "display_name": display_name, + "config": config, + "parent_path": parent_path, + "warehouse_id": warehouse_id, + }) + + +@creator.tool( + description="Update an existing Genie Space with a modified configuration.", +) +async def update_space(space_id: str, config: dict, request: AgentRequest = None) -> dict: + with obo_context(request.user_context.access_token): + return _call_tool("update_space", {"space_id": space_id, "config": config}) + + +# ── Standalone entry point ─────────────────────────────────────────────────── + +app = creator.app diff --git a/agents/creator/schemas.py b/agents/creator/schemas.py new file mode 100644 index 0000000..0e45db1 --- /dev/null +++ b/agents/creator/schemas.py @@ -0,0 +1,166 @@ +"""Pydantic models for creator agent tool schemas. + +These replace ~580 lines of hand-written JSON Schema in +backend/services/create_agent_tools.py. The models serve double duty: + +1. Generate JSON Schema for @app_agent tool registration via + ``GenerateConfigArgs.model_json_schema()`` +2. Validate + parse incoming tool arguments at runtime via + ``GenerateConfigArgs(**kwargs)`` + +Usage in agents/creator/app.py:: + + from agents.creator.schemas import GenerateConfigArgs + + @creator.tool( + description="Generate a complete Genie Space configuration", + parameters=GenerateConfigArgs.model_json_schema(), + ) + async def generate_config(**kwargs) -> dict: + args = GenerateConfigArgs(**kwargs) # Validates at runtime + ... +""" + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, Field + + +# ── Shared nested types ────────────────────────────────────────────────────── + + +class ColumnConfig(BaseModel): + """Column-level configuration within a table.""" + + column_name: str + description: Optional[str] = None + synonyms: Optional[list[str]] = None + exclude: Optional[bool] = None + enable_matching: Optional[bool] = None + + +class SqlParameter(BaseModel): + """Parameter definition for parameterized example SQL.""" + + name: str + type_hint: str = Field( + ..., pattern="^(STRING|NUMBER|DATE|BOOLEAN)$" + ) + description: Optional[str] = None + default_value: str + + +class ExampleSql(BaseModel): + """Example SQL with natural-language question mapping.""" + + question: str + sql: str + usage_guidance: Optional[str] = None + parameters: Optional[list[SqlParameter]] = None + + +class Measure(BaseModel): + """Aggregate measure definition (SUM, AVG, COUNT, etc.).""" + + alias: str + sql: str + display_name: Optional[str] = None + synonyms: Optional[list[str]] = None + instruction: Optional[str] = None + comment: Optional[str] = None + + +class Filter(BaseModel): + """Pre-defined filter (WHERE clause snippet).""" + + display_name: str + sql: str + synonyms: Optional[list[str]] = None + instruction: Optional[str] = None + comment: Optional[str] = None + + +class Expression(BaseModel): + """Computed expression (derived column).""" + + alias: str + sql: str + display_name: Optional[str] = None + synonyms: Optional[list[str]] = None + instruction: Optional[str] = None + comment: Optional[str] = None + + +class JoinSpec(BaseModel): + """Join specification between two tables.""" + + left_table: str + left_alias: str + right_table: str + right_alias: str + left_column: str + right_column: str + relationship: str = Field( + ..., + pattern="^(MANY_TO_ONE|ONE_TO_MANY|ONE_TO_ONE|MANY_TO_MANY)$", + ) + instruction: Optional[str] = None + comment: Optional[str] = None + + +class Benchmark(BaseModel): + """Question/SQL pair for evaluation benchmarks.""" + + question: str + expected_sql: str + + +class MetricViewColumnConfig(BaseModel): + """Column configuration within a metric view.""" + + column_name: str + description: Optional[str] = None + enable_format_assistance: Optional[bool] = None + + +class MetricView(BaseModel): + """Metric view definition (curated data view).""" + + identifier: str + description: Optional[str] = None + column_configs: Optional[list[MetricViewColumnConfig]] = None + + +class TableConfig(BaseModel): + """Table-level configuration with optional column configs.""" + + identifier: str + description: Optional[str] = None + column_configs: Optional[list[ColumnConfig]] = None + + +# ── Top-level tool argument models ─────────────────────────────────────────── + + +class GenerateConfigArgs(BaseModel): + """Arguments for ``generate_config`` and ``present_plan`` tools. + + These tools share the same schema — present_plan previews what + generate_config will produce. + """ + + tables: list[TableConfig] + sample_questions: Optional[list[str]] = None + text_instructions: Optional[list[str]] = None + example_sqls: Optional[list[ExampleSql]] = Field( + None, min_length=3 + ) + measures: Optional[list[Measure]] = None + filters: Optional[list[Filter]] = None + expressions: Optional[list[Expression]] = None + join_specs: Optional[list[JoinSpec]] = None + benchmarks: Optional[list[Benchmark]] = None + generate_benchmarks: Optional[bool] = None + metric_views: Optional[list[MetricView]] = None diff --git a/agents/fixer/app.py b/agents/fixer/app.py new file mode 100644 index 0000000..edd0c15 --- /dev/null +++ b/agents/fixer/app.py @@ -0,0 +1,85 @@ +"""genie-fixer — AI fix agent for Genie Space configurations. + +Wraps: + - backend/services/fix_agent.py (FixAgent — LLM patch generation + application) + +Streaming: Yes (SSE — thinking, patch, applying, complete/error events) +LLM: Yes (fix plan generation) +""" + +from __future__ import annotations + +from dbx_agent_app import AgentRequest, AgentResponse, app_agent + +from agents._shared.auth_bridge import obo_context + + +@app_agent( + name="genie-fixer", + description=( + "AI fix agent that generates and applies targeted patches to Genie " + "Space configurations based on IQ scan findings." + ), +) +async def fixer(request: AgentRequest) -> AgentResponse: + """Route incoming agent requests to fix tools.""" + ... + + +# ── Tools ──────────────────────────────────────────────────────────────────── + + +@fixer.tool( + description=( + "Generate and apply fixes to a Genie Space based on IQ scan findings. " + "Returns a stream of progress events: thinking, patch details, " + "application status, and final result with before/after diff." + ), +) +async def generate_fixes( + space_id: str, + findings: list[str], + space_config: dict, + request: AgentRequest = None, +) -> list[dict]: + """Wraps backend/services/fix_agent.py::FixAgent.run + + Collects the streaming events into a list for agent protocol compatibility. + For SSE streaming via the supervisor proxy, use the monolith endpoint. + """ + with obo_context(request.user_context.access_token): + from backend.services.fix_agent import get_fix_agent + + agent = get_fix_agent() + events = [] + async for event in agent.run( + space_id=space_id, + findings=findings, + space_config=space_config, + ): + events.append(event) + + return events + + +@fixer.tool( + description=( + "Apply a specific config patch to a Genie Space via the Databricks API. " + "Takes a full updated config and writes it to the space." + ), +) +async def apply_patch( + space_id: str, + updated_config: dict, + request: AgentRequest = None, +) -> dict: + """Wraps backend/services/fix_agent.py::_apply_config_to_databricks""" + with obo_context(request.user_context.access_token): + from backend.services.fix_agent import _apply_config_to_databricks + await _apply_config_to_databricks(space_id, updated_config) + return {"space_id": space_id, "status": "applied"} + + +# ── Standalone entry point ─────────────────────────────────────────────────── + +app = fixer.app diff --git a/agents/fixer/app.yaml b/agents/fixer/app.yaml new file mode 100644 index 0000000..0f99d55 --- /dev/null +++ b/agents/fixer/app.yaml @@ -0,0 +1,6 @@ +command: ["uvicorn", "agents.fixer.app:app", "--host", "0.0.0.0", "--port", "8000"] +env: + - name: DATABRICKS_HOST + value: "" + - name: LLM_MODEL + value: "databricks-claude-sonnet-4-6" diff --git a/agents/optimizer/app.py b/agents/optimizer/app.py new file mode 100644 index 0000000..a4b882f --- /dev/null +++ b/agents/optimizer/app.py @@ -0,0 +1,116 @@ +"""genie-optimizer — optimization suggestions from benchmark feedback. + +Wraps: + - backend/services/optimizer.py (GenieSpaceOptimizer) + - backend/routers/analysis.py (optimize, merge, create endpoints) + +Streaming: Yes (heartbeat SSE for long LLM calls) +LLM: Yes (suggestion generation) +""" + +from __future__ import annotations + +import asyncio + +from dbx_agent_app import AgentRequest, AgentResponse, app_agent + +from agents._shared.auth_bridge import obo_context + + +@app_agent( + name="genie-optimizer", + description=( + "Generates optimization suggestions for Genie Spaces based on " + "benchmark labeling feedback. Merges suggestions into config and " + "can create new optimized spaces." + ), +) +async def optimizer(request: AgentRequest) -> AgentResponse: + """Route incoming agent requests to optimization tools.""" + ... + + +# ── Tools ──────────────────────────────────────────────────────────────────── + + +@optimizer.tool( + description=( + "Generate optimization suggestions based on benchmark labeling " + "feedback. Uses LLM to analyze failure patterns and recommend " + "config changes. May take 30-90 seconds." + ), +) +async def generate_suggestions( + space_data: dict, + labeling_feedback: list[dict], + request: AgentRequest = None, +) -> dict: + """Wraps backend/services/optimizer.py::generate_optimizations""" + with obo_context(request.user_context.access_token): + from backend.services.auth import run_in_context + from backend.services.optimizer import get_optimizer + from backend.models import LabelingFeedbackItem + + feedback_items = [LabelingFeedbackItem(**f) for f in labeling_feedback] + + def _run(): + return get_optimizer().generate_optimizations( + space_data=space_data, + labeling_feedback=feedback_items, + ) + + result = await asyncio.get_running_loop().run_in_executor( + None, run_in_context(_run), + ) + return result.model_dump() + + +@optimizer.tool( + description=( + "Merge optimization suggestions into a space config. Fast operation " + "that applies field-level changes without LLM calls." + ), +) +async def merge_config( + space_data: dict, + suggestions: list[dict], + request: AgentRequest = None, +) -> dict: + """Wraps backend/services/optimizer.py::merge_config""" + with obo_context(request.user_context.access_token): + from backend.services.optimizer import get_optimizer + from backend.models import OptimizationSuggestion + + suggestion_items = [OptimizationSuggestion(**s) for s in suggestions] + result = get_optimizer().merge_config( + space_data=space_data, + suggestions=suggestion_items, + ) + return result.model_dump() + + +@optimizer.tool( + description=( + "Create a new Genie Space with an optimized configuration. " + "Requires GENIE_TARGET_DIRECTORY to be configured." + ), +) +async def create_space( + display_name: str, + merged_config: dict, + parent_path: str | None = None, + request: AgentRequest = None, +) -> dict: + """Wraps backend/genie_creator.py::create_genie_space""" + with obo_context(request.user_context.access_token): + from backend.genie_creator import create_genie_space as _create + return _create( + display_name=display_name, + merged_config=merged_config, + parent_path=parent_path, + ) + + +# ── Standalone entry point ─────────────────────────────────────────────────── + +app = optimizer.app diff --git a/agents/optimizer/app.yaml b/agents/optimizer/app.yaml new file mode 100644 index 0000000..e12f39e --- /dev/null +++ b/agents/optimizer/app.yaml @@ -0,0 +1,10 @@ +command: ["uvicorn", "agents.optimizer.app:app", "--host", "0.0.0.0", "--port", "8000"] +env: + - name: DATABRICKS_HOST + value: "" + - name: SQL_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: LLM_MODEL + value: "databricks-claude-sonnet-4-6" + - name: GENIE_TARGET_DIRECTORY + value: "/Shared/" diff --git a/agents/scorer/app.py b/agents/scorer/app.py new file mode 100644 index 0000000..a9058f1 --- /dev/null +++ b/agents/scorer/app.py @@ -0,0 +1,160 @@ +"""genie-scorer — IQ scoring agent for Genie Spaces. + +Wraps: + - backend/services/scanner.py (rule-based scoring engine) + - backend/services/lakebase.py (score persistence, stars) + - backend/routers/spaces.py (list, detail endpoints) + +This agent has NO LLM dependency — pure rule-based scoring. +""" + +from __future__ import annotations + +from dbx_agent_app import AgentRequest, AgentResponse, app_agent + +from agents._shared.auth_bridge import obo_context +from agents._shared.lakebase_client import init_pool, SCORER_DDL + + +@app_agent( + name="genie-scorer", + description=( + "IQ scoring for Genie Spaces. Scans space configurations against a " + "rule-based scoring rubric (foundation, data setup, SQL assets, " + "optimization), persists scores to Lakebase, and tracks score history." + ), +) +async def scorer(request: AgentRequest) -> AgentResponse: + """Route incoming agent requests to the appropriate scoring tool.""" + ... + + +# ── Lifecycle ──────────────────────────────────────────────────────────────── + + +async def on_startup(): + """Initialize Lakebase pool with scorer-specific DDL.""" + await init_pool(SCORER_DDL) + + +# ── Tools ──────────────────────────────────────────────────────────────────── + + +@scorer.tool( + description=( + "Run an IQ scan on a Genie Space. Fetches the space configuration, " + "calculates a score (0-15) across four dimensions, and persists the " + "result to Lakebase." + ), +) +async def scan_space(space_id: str, request: AgentRequest) -> dict: + """Wraps backend/services/scanner.py::scan_space""" + with obo_context(request.user_context.access_token): + from backend.services.scanner import scan_space as _scan + return await _scan(space_id) + + +@scorer.tool( + description=( + "Get score history for a Genie Space over the last N days. " + "Returns a list of {score, maturity, scanned_at} entries." + ), +) +async def get_history(space_id: str, days: int = 30) -> list[dict]: + """Wraps backend/services/lakebase.py::get_score_history""" + from backend.services.lakebase import get_score_history + rows = await get_score_history(space_id, days=days) + return [dict(r) for r in rows] if rows else [] + + +@scorer.tool( + description="Toggle the star (bookmark) status of a Genie Space.", +) +async def toggle_star(space_id: str, starred: bool) -> dict: + """Wraps backend/services/lakebase.py::star_space""" + from backend.services.lakebase import star_space + await star_space(space_id, starred) + return {"space_id": space_id, "starred": starred} + + +@scorer.tool( + description=( + "List all Genie Spaces the user has access to, enriched with IQ " + "scores. Supports filtering by name, star status, and score range." + ), +) +async def list_spaces( + search: str | None = None, + starred_only: bool = False, + min_score: int | None = None, + max_score: int | None = None, + request: AgentRequest = None, +) -> list[dict]: + """Wraps backend/routers/spaces.py::list_spaces logic""" + with obo_context(request.user_context.access_token): + from backend.services.genie_client import list_genie_spaces + from backend.services.lakebase import get_latest_score, get_starred_spaces + from backend.services.auth import get_workspace_client + + raw_spaces = list_genie_spaces() + client = get_workspace_client() + host = (client.config.host or "").rstrip("/") + starred_ids = set(await get_starred_spaces()) + + items = [] + for space in raw_spaces: + sid = space.get("space_id", "") + title = space.get("display_name", space.get("title", "")) + + if search and search.lower() not in title.lower(): + continue + if starred_only and sid not in starred_ids: + continue + + score_data = await get_latest_score(sid) + score = score_data.get("score") if score_data else None + + if min_score is not None and (score is None or score < min_score): + continue + if max_score is not None and (score is None or score > max_score): + continue + + items.append({ + "space_id": sid, + "title": title, + "space_url": f"{host}/genie/rooms/{sid}" if host else None, + "score": score, + "maturity": score_data.get("maturity") if score_data else None, + "starred": sid in starred_ids, + }) + + return items + + +@scorer.tool( + description="Get detailed space metadata with latest scan result and star status.", +) +async def get_space_detail(space_id: str, request: AgentRequest) -> dict: + """Wraps backend/routers/spaces.py::get_space_detail logic""" + with obo_context(request.user_context.access_token): + from backend.services.genie_client import get_genie_space + from backend.services.lakebase import get_latest_score, is_space_starred + + space_info = get_genie_space(space_id) + score_data = await get_latest_score(space_id) + starred = await is_space_starred(space_id) + + return { + "space_id": space_id, + "title": space_info.get("display_name", ""), + "score": score_data.get("score") if score_data else None, + "maturity": score_data.get("maturity") if score_data else None, + "starred": starred, + "last_scanned": score_data.get("scanned_at") if score_data else None, + "scan_result": score_data, + } + + +# ── Standalone entry point ─────────────────────────────────────────────────── + +app = scorer.app diff --git a/agents/supervisor/__init__.py b/agents/supervisor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agents/supervisor/proxy.py b/agents/supervisor/proxy.py new file mode 100644 index 0000000..62107de --- /dev/null +++ b/agents/supervisor/proxy.py @@ -0,0 +1,179 @@ +"""Transparent proxy from supervisor to sub-agents. + +Maps current /api/* paths to sub-agent URLs so the React SPA needs +zero changes. Handles JSON responses and SSE streaming. + +The route table is ordered — more specific paths match before general +prefixes. Each entry maps a path prefix to an environment variable +containing the sub-agent's base URL (set via agents.yaml url_env_map). + +Path → agent mapping derived from frontend/src/lib/api.ts (28 API calls). +""" + +from __future__ import annotations + +import os +import re +from typing import Optional + +import httpx +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, StreamingResponse + + +# Ordered route table: (pattern, env_var_or_None) +# More specific patterns MUST come before general prefixes. +# None means the supervisor handles the route directly (no proxy). +ROUTE_TABLE: list[tuple[str, Optional[str]]] = [ + # Specific sub-paths that override their parent prefix + ("/api/spaces/*/fix", "FIXER_URL"), # fix agent (SSE) + ("/api/genie/create", "CREATOR_URL"), # create Genie Space via API + + # General prefixes + ("/api/spaces", "SCORER_URL"), # list, scan, history, star + ("/api/space", "ANALYZER_URL"), # fetch, parse + ("/api/analyze", "ANALYZER_URL"), # section, all, stream (SSE) + ("/api/genie", "ANALYZER_URL"), # query + ("/api/sql", "ANALYZER_URL"), # execute + ("/api/optimize", "OPTIMIZER_URL"), # stream optimization (SSE) + ("/api/config", "OPTIMIZER_URL"), # merge + ("/api/create", "CREATOR_URL"), # agent chat (SSE), discover, validate, create + ("/api/checklist", "ANALYZER_URL"), # static content + ("/api/sections", "ANALYZER_URL"), # section list + + # Supervisor-owned (no proxy) + ("/api/settings", None), + ("/api/auth", None), + ("/api/admin", None), +] + +# Pre-compile glob patterns (only "/api/spaces/*/fix" currently) +_COMPILED_ROUTES: list[tuple[re.Pattern, Optional[str]]] = [] + +for pattern, env_var in ROUTE_TABLE: + if "*" in pattern: + # Convert glob "*" to regex "[^/]+" + regex = "^" + re.escape(pattern).replace(r"\*", "[^/]+") + _COMPILED_ROUTES.append((re.compile(regex), env_var)) + else: + # Simple prefix match + _COMPILED_ROUTES.append((re.compile("^" + re.escape(pattern)), env_var)) + + +def _resolve_upstream(path: str) -> Optional[str]: + """Find the upstream agent URL for a given API path. + + Returns: + Base URL string if the path should be proxied. + None if the supervisor handles it directly. + + Raises: + KeyError: If no route matches the path. + """ + for compiled_pattern, env_var in _COMPILED_ROUTES: + if compiled_pattern.match(path): + if env_var is None: + return None + url = os.environ.get(env_var) + if not url: + return None + return url.rstrip("/") + + raise KeyError(f"No route for {path}") + + +# Hop-by-hop headers that should not be forwarded +_HOP_HEADERS = frozenset({"host", "content-length", "transfer-encoding"}) + + +def mount_proxy(app: FastAPI): + """Mount the catch-all proxy route on a FastAPI app. + + This should be mounted AFTER any supervisor-owned routes + (settings, auth, admin) so they take priority. + """ + + @app.api_route( + "/api/{path:path}", + methods=["GET", "POST", "PUT", "DELETE"], + ) + async def proxy(request: Request, path: str): + full_path = f"/api/{path}" + + try: + upstream_base = _resolve_upstream(full_path) + except KeyError: + return JSONResponse( + status_code=404, + content={"detail": f"No upstream agent for {full_path}"}, + ) + + if upstream_base is None: + # Supervisor-owned route that wasn't caught by an explicit handler. + return JSONResponse( + status_code=404, + content={"detail": f"Not found: {full_path}"}, + ) + + # Forward all headers except hop-by-hop + headers = { + k: v + for k, v in request.headers.items() + if k.lower() not in _HOP_HEADERS + } + + upstream_url = f"{upstream_base}{full_path}" + if request.url.query: + upstream_url += f"?{request.url.query}" + + body = await request.body() + + # First, make a non-streaming request to check the content type + async with httpx.AsyncClient(timeout=300.0) as client: + upstream_resp = await client.request( + method=request.method, + url=upstream_url, + headers=headers, + content=body, + follow_redirects=True, + ) + + content_type = upstream_resp.headers.get("content-type", "") + + # SSE: re-issue as a streaming request and forward chunks + if "text/event-stream" in content_type: + + async def stream(): + async with httpx.AsyncClient(timeout=300.0) as sc: + async with sc.stream( + method=request.method, + url=upstream_url, + headers=headers, + content=body, + ) as sr: + async for chunk in sr.aiter_bytes(): + yield chunk + + return StreamingResponse( + stream(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + }, + ) + + # JSON: pass through with status code + if content_type.startswith("application/json"): + return JSONResponse( + status_code=upstream_resp.status_code, + content=upstream_resp.json(), + headers={"X-Upstream-Agent": upstream_base}, + ) + + # Other content types: pass through as-is + return JSONResponse( + status_code=upstream_resp.status_code, + content={"raw": upstream_resp.text}, + headers={"X-Upstream-Agent": upstream_base}, + ) diff --git a/docs/architecture-proposal.md b/docs/architecture-proposal.md new file mode 100644 index 0000000..8e547b7 --- /dev/null +++ b/docs/architecture-proposal.md @@ -0,0 +1,429 @@ +# Agent Deployment Layer for Genie Workbench + +> **Status:** Proposal +> **Author:** Stuart Gano +> **Date:** 2026-03-10 + +--- + +## Summary + +The Genie Workbench now has scoring, analysis, optimization, creation, and auto-optimization all working as a Databricks App. This proposal adds an **agent deployment layer** so each capability can also be deployed as a standalone Databricks agent — enabling A2A discovery, MCP tool integration, and independent `mlflow.genai.evaluate()` testing. + +The existing backend is unchanged. The agent layer wraps existing domain logic using: + +- **`dbx-agent-app`** (`@app_agent` decorator) — auto-generates `/invocations` endpoints, agent cards, MCP servers, and health checks +- **AI Dev Kit** (`databricks-tools-core`) — optional drop-in replacements for UC browsing and SQL execution + +**What this enables:** +- Other workspace apps can discover and call Workbench capabilities via A2A protocol +- Each agent gets a free MCP server for tool integration +- Automated eval pipelines via `mlflow.genai.evaluate()` against individual agents +- Independent deployment of individual capabilities when needed + +**What this does NOT change:** +- The existing monolith deployment continues to work as-is +- The React frontend is unmodified +- All existing domain logic (scanner, analyzer, optimizer, fix agent, create agent, GSO) stays in place + +--- + +## Architecture + +The agent layer sits alongside the existing monolith. Both deployment modes work: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ EXISTING: Monolith (unchanged) │ +│ backend/main.py → routers → services → frontend/dist │ +│ Deployed via: databricks apps deploy │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ NEW: Agent Layer (additive) │ +│ Deployed via: dbx-agent-app deploy --config agents.yaml │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ genie- │ │ genie- │ │ genie- │ │ +│ │ scorer │ │ analyzer │ │ creator │ │ +│ │ wraps: │ │ wraps: │ │ wraps: │ │ +│ │ scanner │ │ analyzer │ │ create_ │ │ +│ │ .py │ │ .py │ │ agent.py │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ genie- │ │ genie- │ │supervisor│ │ +│ │ optimizer│ │ fixer │ │ React SPA│ │ +│ │ wraps: │ │ wraps: │ │ + proxy │ │ +│ │ optimizer│ │ fix_agent│ │ │ │ +│ │ .py │ │ .py │ │ │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +Each agent wraps existing domain logic and adds: +- **`@app_agent` decorator** — auto-generates `/invocations`, `/.well-known/agent.json`, `/health`, MCP server +- **OBO auth** — `request.user_context` bridges into existing auth via `obo_context()` +- **Tool definitions** — auto-generated from `@agent.tool()` decorated functions +- **Eval support** — `app_predict_fn()` bridge to `mlflow.genai.evaluate()` + +--- + +## Agent Decomposition + +### What each agent wraps + +| Agent | Wraps | Tools exposed | Lakebase? | Streaming? | LLM? | +|-------|-------|---------------|-----------|------------|------| +| **genie-scorer** | `services/scanner.py` | `scan_space`, `get_history`, `toggle_star`, `list_spaces` | Yes (scores, stars) | No | No | +| **genie-analyzer** | `services/analyzer.py`, `services/genie_client.py` | `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql` | No | Yes (SSE) | Yes | +| **genie-creator** | `services/create_agent.py`, `services/create_agent_tools.py` | All 16 current tools (discover_*, describe_*, profile_*, generate_config, etc.) | Yes (sessions) | Yes (SSE) | Yes | +| **genie-optimizer** | `services/optimizer.py` | `generate_suggestions`, `merge_config`, `label_benchmark` | No | No (heartbeat SSE) | Yes | +| **genie-fixer** | `services/fix_agent.py` | `generate_fixes`, `apply_patch` | No | Yes (SSE) | Yes | +| **supervisor** | Existing React SPA | Routes frontend API calls to sub-agents, serves static files | Yes (starred) | Proxy | No | + +Each agent imports from `backend/services/` — the domain logic stays where it is. The agent layer is a thin wrapper that exposes existing functions as agent tools with standard protocol support. + +### Domain logic (unchanged) + +These files contain the business logic that agents wrap. They are not modified: + +- `scanner.py` — Rule-based IQ scoring (maturity levels, dimension weights) +- `analyzer.py` — LLM checklist evaluation with session management +- `optimizer.py` — Optimization suggestion generation from labeling feedback +- `fix_agent.py` — Patch generation + application via Genie API +- `create_agent.py` — Tool-calling loop with message compaction, JSON repair, session recovery +- `create_agent_session.py` — Two-tier session persistence (memory + Lakebase) +- `prompts_create/` — Dynamic prompt assembly (9 modules: core, data_sources, requirements, plan, etc.) +- `references/schema.md` — Genie Space schema reference +- `genie_client.py` — Genie API read operations (including SP-fallback for missing OAuth scopes) +- `lakebase.py` — PostgreSQL persistence with in-memory fallback +- `auto_optimize.py` + GSO package — Auto-optimization pipeline + +--- + +## What the Agent Layer Provides + +### 1. Auto-generated tool definitions from `@agent.tool()` decorators + +Agent tools are defined as decorated functions — schemas, dispatch, and validation are auto-generated: + +```python +@creator.tool(description="List all Unity Catalog catalogs the user has access to.") +async def discover_catalogs() -> dict: + from databricks_tools_core.unity_catalog import list_catalogs + return {"catalogs": list_catalogs()} + +@creator.tool(description="List schemas within a catalog.") +async def discover_schemas(catalog: str) -> dict: + from databricks_tools_core.unity_catalog import list_schemas + return {"schemas": list_schemas(catalog)} +``` + +For tools with complex nested parameters (like `generate_config`), Pydantic models provide the schema and runtime validation in one place — see `agents/creator/schemas.py`. + +### 2. OBO auth bridging via `obo_context()` + +Agents receive the user's token via `request.user_context`. The `obo_context()` context manager bridges this into the existing `get_workspace_client()` pattern so domain logic works unchanged: + +```python +@scorer.tool(description="Run IQ scan on a Genie Space") +async def scan_space(space_id: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + # Existing scanner.py works as-is — get_workspace_client() returns OBO client + result = scanner.calculate_score(space_id) +``` + +### 3. Optional AI Dev Kit integration + +Where applicable, agents can use `databricks-tools-core` as drop-in replacements: + +| Existing service | AI Dev Kit equivalent | +|---------|-------------| +| `backend/services/uc_client.py` | `databricks_tools_core.unity_catalog` | +| SQL execution in various services | `databricks_tools_core.sql` | +| Warehouse auto-detection | `get_best_warehouse()` | + +This is optional and incremental — agents can import existing services or AI Dev Kit functions interchangeably. + +--- + +## Deployment Topology + +### agents.yaml + +```yaml +project: + name: genie-workbench + workspace_path: /Workspace/Shared/apps + +agents: + - name: scorer + source: ./agents/scorer + - name: analyzer + source: ./agents/analyzer + - name: creator + source: ./agents/creator + - name: optimizer + source: ./agents/optimizer + - name: fixer + source: ./agents/fixer + - name: supervisor + source: . + depends_on: [scorer, analyzer, creator, optimizer, fixer] + url_env_map: + scorer: SCORER_URL + analyzer: ANALYZER_URL + creator: CREATOR_URL + optimizer: OPTIMIZER_URL + fixer: FIXER_URL +``` + +Each agent deploys as its own Databricks App with: +- Its own `app.yaml` defining env vars and resource bindings +- Its own service principal (for Lakebase, LLM endpoint access) +- Auto-generated `/.well-known/agent.json` for A2A discovery +- Auto-generated MCP server for tool integration + +### Per-Agent app.yaml Example (scorer) + +```yaml +command: ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] +env: + - name: LAKEBASE_HOST + value: "" + - name: LAKEBASE_INSTANCE_NAME + value: "" +``` + +--- + +## Wire Protocol + +### Frontend → Supervisor → Sub-Agents + +The React SPA continues to hit the same API paths (`/api/spaces/*`, `/api/analyze/*`, `/api/create/*`). The supervisor proxies requests to sub-agents: + +``` +Browser → /api/spaces/scan → supervisor → genie-scorer /invocations +Browser → /api/analyze/stream → supervisor → genie-analyzer /invocations +Browser → /api/create/agent/chat → supervisor → genie-creator /invocations +Browser → /api/optimize → supervisor → genie-optimizer /invocations +Browser → /api/spaces/{id}/fix → supervisor → genie-fixer /invocations +``` + +The supervisor uses the Responses Agent protocol (or simple HTTP proxying) to forward requests. For streaming endpoints, the supervisor proxies SSE responses transparently. + +### Agent-to-Agent (A2A) Discovery + +After deployment, each agent exposes `/.well-known/agent.json`: + +```json +{ + "name": "genie-scorer", + "description": "IQ scoring for Genie Spaces", + "url": "https://genie-workbench-scorer.cloud.databricks.com", + "tools": [ + {"name": "scan_space", "description": "Run IQ scan on a Genie Space"}, + {"name": "get_history", "description": "Get score history"}, + {"name": "toggle_star", "description": "Toggle star on a space"} + ] +} +``` + +Other workspace apps can discover and call these agents using `AgentDiscovery`. + +--- + +## Implementation Roadmap + +Each phase adds a working agent. The monolith continues to serve production throughout. + +### Phase 1: Scaffolds + Architecture ← **This PR** + +- Agent scaffolds with tool signatures and source traceability +- `agents.yaml` deployment config +- Shared modules: `auth_bridge.py`, `lakebase_client.py`, `sp_fallback.py` +- This document + +### Phase 2: Wire up genie-scorer (lowest risk) + +**Why first:** No LLM calls, no streaming, no sessions — pure rule-based scoring. Validates the `@app_agent` + `obo_context()` pattern end-to-end. + +- Agent tool implementations call `backend/services/scanner.py` directly +- Deploy alongside monolith, verify via `/invocations` and MCP + +### Phase 3: Wire up genie-fixer (streaming + LLM) + +Validates SSE streaming through `@app_agent` + LLM tool calling. + +### Phase 4: Wire up genie-analyzer (streaming + LLM, multi-tool) + +Tools: `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql` + +### Phase 5: Wire up genie-optimizer + +Tools: `generate_suggestions`, `merge_config`, `label_benchmark` + +### Phase 6: Wire up genie-creator (most complex) + +16 tools, session persistence, complex tool-calling loop. Pydantic schemas (in `agents/creator/schemas.py`) replace hand-written JSON tool definitions. + +### Phase 7: Supervisor + frontend proxy + +Optional: if agent deployment becomes the primary mode, add a supervisor that serves the React SPA and proxies API calls to sub-agents. The frontend stays unchanged — same API paths, same behavior. + +--- + +## Eval Story + +Each agent becomes independently evaluatable via the `dbx-agent-app` bridge: + +```python +from dbx_agent_app.bridge import app_predict_fn +import mlflow + +predict = app_predict_fn("https://genie-workbench-scorer.cloud.databricks.com") +results = mlflow.genai.evaluate( + data=eval_dataset, + predict_fn=predict, + scorers=[correctness_scorer, latency_scorer], +) +``` + +This replaces the current "manual curl and check" testing with automated, repeatable evaluation pipelines for each agent independently. + +--- + +## Shared Modules + +The agent layer includes shared utilities in `agents/_shared/` that handle the integration between `@app_agent` and existing backend services. + +### 1. Auth Bridge → `agents/_shared/auth_bridge.py` + +`obo_context()` is a context manager that bridges `@app_agent`'s `request.user_context` into the existing `get_workspace_client()` pattern, plus `databricks-tools-core` ContextVars. This lets agent tools call existing domain logic without modification: + +```python +from agents._shared.auth_bridge import obo_context + +@scorer.tool(description="Run IQ scan on a Genie Space") +async def scan_space(space_id: str, request: AgentRequest) -> dict: + with obo_context(request.user_context.access_token): + # All of these now work: + # - monolith's get_workspace_client() returns OBO client + # - databricks-tools-core functions use OBO token + result = scanner.calculate_score(space_id) +``` + +For streaming generators, capture the token before the generator starts and re-enter `obo_context()` per-yield (same pattern as `backend/routers/create.py:125-198`). + +### 2. Complex Tool Schemas → `agents/creator/schemas.py` + +For tools with deeply nested parameters (like `generate_config` with 11 params across 4-5 nesting levels), Pydantic models provide the JSON Schema and runtime validation in ~80 lines: + +```python +from agents.creator.schemas import GenerateConfigArgs + +@creator.tool( + description="Generate a Genie Space configuration", + parameters=GenerateConfigArgs.model_json_schema(), +) +async def generate_config(**kwargs) -> dict: + args = GenerateConfigArgs(**kwargs) # Validate at runtime +``` + +Schema and validation stay in sync because they come from the same source. + +### 3. Supervisor Proxy → `agents/supervisor/proxy.py` + +If agents are deployed independently, the supervisor proxies frontend API calls to the correct agent. Ordered route table with prefix matching, glob support for path parameters, and SSE stream detection: + +```python +ROUTE_TABLE = [ + ("/api/spaces/*/fix", "FIXER_URL"), # specific before general + ("/api/genie/create", "CREATOR_URL"), + ("/api/spaces", "SCORER_URL"), + ("/api/analyze", "ANALYZER_URL"), + ("/api/create", "CREATOR_URL"), + # ... etc +] +``` + +SSE streams are detected by `content-type: text/event-stream` and forwarded as chunked bytes. OBO headers pass through automatically. + +### 4. SP Fallback → `agents/_shared/sp_fallback.py` + +Centralizes the SP-fallback pattern for Genie API calls where the user's OBO token may lack required OAuth scopes: + +```python +from agents._shared.sp_fallback import genie_api_call + +# One-liner with automatic SP fallback +space = genie_api_call("GET", f"/api/2.0/genie/spaces/{space_id}", + query={"include_serialized_space": "true"}) +``` + +### 5. Shared Lakebase Pool → `agents/_shared/lakebase_client.py` + +Shared asyncpg pool lifecycle with idempotent DDL. Each agent initializes its own pool from its own env vars: + +```python +from agents._shared.lakebase_client import init_pool, SCORER_DDL + +# At startup — creates tables if they don't exist +await init_pool(SCORER_DDL) +``` + +Each agent initializes its own pool from its own env vars. Domain-specific query functions stay in each agent's module. The shared client manages pool lifecycle, credential generation, and DDL only. + +--- + +## What You Get + +| Capability | Today | With agent layer | +|------------|-------|-----------------| +| Auto-generated endpoints | — | 30+ (5 agents × `/invocations`, `/health`, `agent.json`, MCP, etc.) | +| MCP servers | — | 5 (one per agent, free) | +| Agent discovery | — | A2A protocol, workspace-wide | +| Eval support | Manual testing | `mlflow.genai.evaluate()` via `app_predict_fn()` bridge | +| Independent deployment | — | `dbx-agent-app deploy --config agents.yaml --agent scorer` | +| Tool definitions | Hand-written JSON schemas | Auto-generated from function signatures + Pydantic models | + +--- + +## Verification Plan + +1. **Unit tests:** Each agent's tools can be tested independently via `agent(AgentRequest(...))` — the `@app_agent` decorator makes the handler directly callable. + +2. **Integration tests:** Deploy all agents locally (`uvicorn agents/scorer/app:app --port 8001`, etc.), configure supervisor with local URLs, run existing E2E tests. + +3. **A2A discovery:** After deploying to Databricks Apps, verify `/.well-known/agent.json` returns correct agent cards. Use `AgentDiscovery` to scan workspace. + +4. **Eval bridge:** Run `mlflow.genai.evaluate()` against each deployed agent using `app_predict_fn()`. + +5. **Frontend smoke test:** Verify React SPA still works end-to-end through the supervisor proxy. + +--- + +## Files in This PR + +All files are additive. No changes to existing `backend/`, `frontend/`, `packages/`, or `scripts/`. + +### Agent scaffolds +- `agents.yaml` — multi-agent deployment config +- `agents/scorer/app.py` + `app.yaml` — scorer agent (wraps scanner.py) +- `agents/analyzer/app.py` + `app.yaml` — analyzer agent (wraps analyzer.py) +- `agents/creator/app.py` + `app.yaml` — creator agent (wraps create_agent.py) +- `agents/creator/schemas.py` — Pydantic models for complex tool parameters +- `agents/optimizer/app.py` + `app.yaml` — optimizer agent (wraps optimizer.py) +- `agents/fixer/app.py` + `app.yaml` — fixer agent (wraps fix_agent.py) +- `agents/supervisor/proxy.py` — frontend-transparent proxy with SSE support + +### Shared modules +- `agents/_shared/auth_bridge.py` — OBO auth context manager bridging `@app_agent` ↔ existing services +- `agents/_shared/sp_fallback.py` — SP fallback decorator for Genie API scope errors +- `agents/_shared/lakebase_client.py` — Shared Lakebase pool with idempotent DDL + +### Documentation +- `docs/architecture-proposal.md` — this document +- `docs/genierx-spec.md` — GenieRX analyzer/recommender specification diff --git a/docs/genierx-spec.md b/docs/genierx-spec.md new file mode 100644 index 0000000..3c5c23f --- /dev/null +++ b/docs/genierx-spec.md @@ -0,0 +1,287 @@ +# GenieRX Specification + +## Purpose + +GenieRX is an analyzer and recommender for Genie spaces and their underlying semantic models. Its job is to: + +- Inspect how data and metrics are modeled for Genie (tables, views, metric views, knowledge store expressions, instructions). +- Classify fields into authoritative facts, canonical metrics, and heuristic signals. +- Recommend changes that align with Databricks best practices for Genie, Unity Catalog metric views, and the Genie knowledge store. + +GenieRX must never change data or semantics itself; it produces a structured review and recommendation set that humans can apply (or that other automation can implement safely). + +--- + +## 1. Core Concepts and Taxonomy + +GenieRX must reason about every field, metric, and score using the following taxonomy: + +### 1.1 Authoritative Facts + +**Definition:** +- Directly sourced from a system of record (billing, CRM, product telemetry, etc.). +- No business logic applied beyond basic cleaning (type casting, null handling). + +**Examples:** +- Transaction amounts, usage measures, timestamps from logs. +- Pipeline stages from CRM. +- Owner/segment assignments from master data. + +**GenieRX behavior:** +- Treat these as safe for Genie to query directly (tables or metric-view sources). +- Recommend surfacing them as columns, dimensions, or base measures without caveats, as long as upstream data quality is acceptable. + +### 1.2 Canonical Metrics + +**Definition:** +- Derived metrics with: + - A clear, stable SQL definition. + - Cross-team agreement (e.g., analytics, finance, ops). + - An owner who is accountable for changes. +- Examples: revenue, active users, funnel conversion, churn rate, cost per order. + +**GenieRX behavior:** +- Prefer to implement as metric view measures or knowledge-store measures/filters/dimensions, not as ad hoc SQL in Genie instructions. +- Encourage: + - Centralized definition in Unity Catalog metric views where possible. + - Short, precise names plus documentation (description + semantic metadata). +- Mark these as safe to present as "facts" in Genie answers (subject to the usual "data as of & filters" context). + +### 1.3 Heuristic Signals + +**Definition:** +- Derived fields that depend on subjective thresholds, incomplete joins, fragile text features, or evolving business rules. +- Examples: + - Coverage / gap flags based on keyword lists and spend thresholds. + - "Is_X" tags inferred via heuristic classification. + - Composite opportunity or risk scores with arbitrary buckets/weights. + - Buckets that encode assumptions about missing data or multi-tenant joins. + +**GenieRX behavior:** +- Always treat these as heuristic signals, not authoritative facts. +- Recommend: + - Implementing them as measures or filters with explicit caveats in the description and/or semantic metadata (for example, "heuristic", "approximate", "experimental"). + - Avoiding column names that imply certainty (prefer `potential_*`, `*_score`, `*_heuristic_flag`). +- When these are currently modeled as bare columns, GenieRX should: + - Flag them as high risk for misinterpretation in Genie answers. + - Suggest converting them into modeled measures/filters with clear labels and descriptions. + +--- + +## 2. Modeling Guidelines with Metric Views + +When the workspace uses Unity Catalog metric views as the semantic layer for Genie, GenieRX must evaluate and recommend according to the following patterns. + +### 2.1 Use Metric Views as the Primary Semantic Layer + +**Best practice:** +- For governed KPIs and complex aggregations, define them once as metric views and use those in: + - Genie spaces. + - Dashboards and alerts. + - SQL clients and downstream tools. + +**GenieRX should:** +- Prefer metric views over ad hoc SQL in Genie instructions when: + - Metrics are reused in many questions or dashboards. + - Correct rollup is non-trivial (ratios, distinct counts, windowed metrics, etc.). + +### 2.2 Organize Semantics into Dimensions, Measures, and Filters + +Metric views express semantics as: +- **Dimensions:** group-by attributes (e.g., account, segment, product, region, time grain). +- **Measures:** aggregated values (sum, avg, distinct count, ratios, scores). +- **Filters:** structured conditions used often for WHERE / HAVING. + +**GenieRX should:** +- Check that: + - Group-by attributes are modeled as dimensions, not repeated ad hoc in SQL. + - Key KPIs are measures, not free-floating columns. + - Common conditions ("active customers", "large orders", "priority accounts") are modeled as filters or boolean measures where appropriate. +- Recommend refactors such as: + - "Promote this repeated WHERE condition into a named filter `active_customers`." + - "Move this ratio calculation into a metric-view measure instead of recomputing it in instructions." + +### 2.3 Implement Heuristic Logic as Measures/Filters, Not Core Columns + +For heuristic signals: +- Prefer to keep raw inputs (spend, text features, joins) as authoritative columns, and encode heuristic logic as measures/filters in the metric view: + - **Measures:** scores or counts indicating likelihood, risk, or opportunity. + - **Filters:** boolean expressions such as `has_potential_gap`, `is_priority_account_heuristic`. + +**GenieRX should recommend:** +- Use descriptions and semantic metadata to mark: + - Purpose (e.g., "heuristic score to prioritize follow-up"). + - Known limitations (e.g., "sensitive to join failures; may over-count"). +- Avoid surfacing these measures as "the number of X" without caveats; instead, position them as signals. + +### 2.4 Enforce Metric-View Querying Best Practices + +Because metric views require explicit measure references: +- Queries must use the `MEASURE()` aggregate function for measures; `SELECT *` is not supported. + +**GenieRX should:** +- Check whether Genie SQL examples and instructions correctly reference measures using `MEASURE()` and: + - Flag places where raw measure columns are referenced without `MEASURE()`. + - Suggest corrected SQL patterns. + +--- + +## 3. Modeling Guidelines with the Genie Knowledge Store + +When the workspace uses Genie knowledge store features (space-level metadata, SQL expressions, entity/value mapping), GenieRX must evaluate and recommend according to these patterns. + +### 3.1 Use SQL Expressions for Structured Semantics + +The knowledge store lets authors define: +- **Measures:** KPIs and metrics with explicit SQL expressions. +- **Filters:** reusable boolean conditions. +- **Dimensions:** computed attributes for grouping or bucketing. + +**GenieRX should:** +- Encourage using SQL expressions for: + - Non-trivial metrics (ratios, distinct counts, window functions). + - Business-rule-based flags (e.g., "strategic customers", "at-risk contracts"). + - Time-derived dimensions (e.g., fiscal period, week buckets). +- Flag situations where: + - The same logic is duplicated across multiple Genie SQL examples/instructions. + - Important metrics only exist inside long-form instructions or user prompts. + +### 3.2 Align Table/Column Metadata with Business Terms + +**Best practice from Genie docs:** +- Keep spaces topic-specific and domain-focused. +- Use clear table and column descriptions and hide irrelevant or duplicate columns. + +**GenieRX should:** +- Evaluate: + - Whether key business terms are reflected in table/column descriptions and synonyms. + - Whether noisy or unused columns remain exposed to Genie. +- Recommend: + - Adding or refining descriptions to explain what measures/dimensions represent. + - Adding synonyms where business language differs from schema names. + - Hiding columns that are raw, deprecated, or confusing for business users. + +### 3.3 Distinguish Canonical vs Heuristic in Descriptions + +For each SQL expression in the knowledge store, GenieRX should: +- Classify as canonical metric or heuristic signal. +- Recommend description patterns, for example: + - **Canonical:** "Primary KPI for [domain]. Defined as ... and reviewed by [team]." + - **Heuristic:** "Heuristic score that approximates [concept]. Based on thresholds X/Y/Z and subject to misclassification. Use as prioritization signal, not as exact count." +- Suggest adding explicit notes for Genie: + - "When answering questions with this metric, briefly explain that it is a heuristic estimate." + +--- + +## 4. Genie Space Best Practices to Enforce + +GenieRX must anchor its recommendations in the official Genie best practices and internal field guidance. + +### 4.1 Scope and Data Model + +- Spaces should be topic-specific (single domain, business area, or workflow), not "kitchen sink" collections of tables. +- Use a small number of core tables or metric views with: + - Clear relationships (defined either in metric views or in knowledge store join metadata). + - Cleaned and de-duplicated columns. + +**GenieRX should:** +- Flag spaces that: + - Include many loosely related tables. + - Depend heavily on raw staging tables instead of curated or metric views. +- Recommend: + - Splitting domains into separate spaces. + - Using curated views / metric views to simplify the model. + +### 4.2 Instructions and Examples + +**Best practices include:** +- Keep instructions concise and focused on business rules and semantics, not low-level SQL formatting. +- Provide example SQL that demonstrates: + - Correct use of metric views and measures. + - Preferred filters and joins. +- Use benchmarks and validation questions to evaluate Genie performance over time. + +**GenieRX should:** +- Assess whether instructions: + - Explain how core metrics are defined and when to use them. + - Avoid unnecessary repetition and token-heavy prose. +- Recommend: + - Extracting embedded business rules from instructions into metric views and knowledge-store expressions. + - Adding or refining benchmark question sets for critical KPIs. + +--- + +## 5. GenieRX Review Workflow + +When GenieRX analyzes a space or semantic model, it should follow this high-level workflow: + +### Step 1: Inventory Sources and Semantics + +- List all data sources used by the space: + - Tables, views, metric views. + - Knowledge-store SQL expressions (measures, filters, dimensions). +- Identify all exposed fields and measures used in example SQL or benchmarks. + +### Step 2: Classify Fields Using the Taxonomy + +- For each column/measure, determine if it's an **authoritative fact**, **canonical metric**, or **heuristic signal** based on: + - Upstream SoT (billing, CRM, product, etc.). + - Presence in metric views or knowledge store. + - Use of thresholds, keyword lists, or ad hoc scoring logic. + +### Step 3: Check Alignment with Databricks Best Practices + +- **Data model:** Topic-focused, few core tables/metric views, clean joins. +- **Semantics:** Canonical metrics in metric views or knowledge-store measures/filters. +- **Instructions:** Clear, concise, oriented around business questions and metrics. +- **Evals:** Benchmarks or validation questions exist for key metrics. + +### Step 4: Generate Recommendations in Three Buckets + +**Safety & Clarity:** +- Where might Genie misrepresent heuristic signals as facts? +- Which metrics need stronger descriptions or caveats? + +**Semantic Modeling:** +- Which repeated logic should be moved into metric views or SQL expressions? +- Which filters or dimensions should be promoted into named entities? + +**Space Design:** +- Should tables/views be swapped for metric views? +- Are there irrelevant columns/tables that should be hidden? +- Are there missing joins, synonyms, or value dictionaries that would improve answer quality? + +### Step 5: Summarize in a User-Friendly Report + +For each analyzed space/model, output: + +1. **Overview** - 1-2 paragraph summary of main findings and risk level (low/medium/high). +2. **Semantic Model Assessment** - Table of key metrics/signals with: Name, type (authoritative/canonical/heuristic), grain, and notes. +3. **Recommended Changes** - Ranked list of concrete actions (e.g., "Create metric view for X", "Convert Y to heuristic measure with description", "Hide columns A/B/C"). +4. **Optional** - Suggestions for benchmarks or validation questions. + +--- + +## 6. Design Principles for GenieRX + +GenieRX should always adhere to these principles: + +- **Do not fabricate** underlying data or definitions; base assessments only on the actual space configuration, metric views, and knowledge store content. +- **Bias toward explicit semantics:** Prefer named measures/filters/dimensions over ad hoc SQL or fragile instructions. +- **Respect governance and ownership:** Highlight when changes would affect canonical metrics owned by other teams; recommend collaboration, not unilateral changes. +- **Aim for explainability:** Recommendations should be understandable to data and business owners. "Move this heuristic from a column to a measure with caveats" is better than opaque tuning. + +--- + +## Sources + +- Unity Catalog metric views | Databricks on AWS +- Build a knowledge store for more reliable Genie spaces | Databricks on AWS +- Genie Best Practices +- [Field Apps] GenieRX: a Genie analyzer / recommender +- Product Analytics (go/product-analytics) +- DAIS 2025 - UC Metrics - Discovery - Genie +- Genie Guidelines +- Genie Space - Field Engineering Guide +- Writing Effective Databricks Genie Instructions +- Genie + Metrics (FEIP-818)