From 675a6b49fc89fe095aa6a837fa0363ef77fc5a50 Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Tue, 10 Mar 2026 09:12:16 -0700
Subject: [PATCH 1/6] docs: add CLAUDE.md and genierx-spec.md

Includes project development guide and the GenieRX analysis/recommendation
framework specification.
---
 CLAUDE.md            | 143 ++++-----------------
 docs/genierx-spec.md | 287 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 313 insertions(+), 117 deletions(-)
 create mode 100644 docs/genierx-spec.md
diff --git a/CLAUDE.md b/CLAUDE.md
index 8630b30..35cd56e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,132 +1,41 @@
 # Genie Workbench
 
-Databricks App for creating, scoring, and optimizing Genie Spaces. FastAPI backend + React/Vite frontend deployed together on Databricks Apps.
+## Project Overview
 
-## Commands
+Genie Workbench is a Databricks App that acts as a quality control and optimization platform for Genie Space administrators. It helps builders understand why their Genie Space isn't performing well and fix it.
 
-```bash
-# Backend (from project root)
-uv pip install -e .                          # Install Python deps
-uvicorn backend.main:app --host 0.0.0.0 --port 8000 --reload  # Dev server
-
-# Frontend (from frontend/)
-cd frontend && npm install && npm run build  # Build for production
-cd frontend && npm run dev                   # Vite dev server (port 5173, proxies /api to :8000)
-cd frontend && npm run lint                  # ESLint
-
-# Full build (what Databricks Apps runs)
-npm install   # Triggers postinstall -> cd frontend && npm install
-npm run build # Triggers cd frontend && npm run build
-
-# Deploy
-databricks sync --watch . /Workspace/Users/<email>/genie-workbench
-databricks apps deploy <app-name> --source-code-path /Workspace/Users/<email>/genie-workbench
-
-# Tests (require running backend at localhost:8000)
-python tests/test_e2e_local.py    # E2E create agent tests
-python tests/test_full_schema.py  # Schema validation
-# Deployed E2E tests require: pip install playwright && playwright install chromium
-python tests/test_e2e_deployed.py
-```
-
-## Architecture
-
-```
-backend/
-  main.py                  # FastAPI app entry point, OBO middleware, static file serving
-  models.py                # All Pydantic models (shared between routers/services)
-  routers/
-    analysis.py            # /api/space/*, /api/analyze/*, /api/optimize, /api/genie/*, /api/sql/*
-    spaces.py              # /api/spaces/* (list, scan, history, star, fix)
-    admin.py               # /api/admin/* (dashboard, leaderboard, alerts)
-    auth.py                # /api/auth/me
-    create.py              # /api/create/* (agent chat, UC discovery, wizard)
-  services/
-    auth.py                # OBO auth (ContextVar), SP fallback, WorkspaceClient mgmt
-    genie_client.py        # Databricks Genie API (fetch space, list spaces, query for SQL)
-    scanner.py             # Rule-based IQ scoring engine (0-100, 4 dimensions)
-    analyzer.py            # LLM-based deep analysis against best-practices checklist
-    optimizer.py           # LLM-based optimization from benchmark feedback
-    fix_agent.py           # LLM agent that generates JSON patches and applies via Genie API
-    create_agent.py        # Multi-turn LLM agent for creating new Genie Spaces
-    create_agent_session.py # Session persistence for create agent (Lakebase)
-    create_agent_tools.py  # Tool definitions for create agent (UC discovery, SQL, etc.)
-    lakebase.py            # PostgreSQL persistence (asyncpg pool, in-memory fallback)
-    llm_utils.py           # OpenAI-compatible LLM client via Databricks serving endpoints
-    uc_client.py           # Unity Catalog browsing (catalogs, schemas, tables)
-  prompts/                 # Prompt templates for analysis
-  prompts_create/          # Prompt templates for create agent (multi-file, modular)
-  references/schema.md     # Genie Space JSON schema reference
-frontend/
-  src/
-    App.tsx                # Root: SpaceList | SpaceDetail | AdminDashboard | CreateAgentChat
-    lib/api.ts             # All API calls (fetch, SSE streaming helpers)
-    types/index.ts         # TypeScript types mirroring backend Pydantic models
-    components/            # UI components (analysis, optimization, fix agent, etc.)
-    pages/                 # SpaceList, SpaceDetail, AdminDashboard, HistoryTab, IQScoreTab
-    hooks/                 # useAnalysis, useTheme
-  vite.config.ts           # Vite config with /api proxy to localhost:8000
-```
-
-## Key Patterns
-
-### Authentication (OBO)
-On Databricks Apps, user identity flows via `x-forwarded-access-token` header. `OBOAuthMiddleware` in `main.py` stores the token in a `ContextVar`. All services call `get_workspace_client()` which returns the OBO client if set, otherwise the SP singleton. Some Genie API calls require SP auth (missing `genie` OAuth scope) — see `_is_scope_error()` fallback in `genie_client.py`.
-
-### SSE Streaming
-Multiple endpoints use `StreamingResponse` with `text/event-stream`:
-- `/api/analyze/stream` — analysis progress
-- `/api/optimize` — optimization with heartbeat keepalives (15s)
-- `/api/spaces/{id}/fix` — fix agent patches
-- `/api/create/agent/chat` — multi-turn agent with typed events (session, step, thinking, tool_call, tool_result, message_delta, message, created, error, done)
-
-Frontend consumes these via manual `fetch` + `ReadableStream` in `lib/api.ts` (not EventSource). Buffer splitting on `\n\n`.
+- **Backend:** Python (FastAPI), deployed as a Databricks App
+- **Frontend:** React/TypeScript (Vite)
+- **Storage:** Lakebase (with in-memory fallback for local dev)
+- **Tracing:** Optional MLflow integration
 
-### Lakebase Persistence
-`services/lakebase.py` uses asyncpg with graceful fallback to in-memory dicts when `LAKEBASE_HOST` is not set. Credentials auto-generated via Databricks SDK (`/api/2.0/database/credentials`). Schema defined in `sql/setup_lakebase.sql`.
+## GenieRX Specification
 
-### LLM Calls
-All LLM calls go through Databricks model serving endpoints using OpenAI-compatible API. Model configured via `LLM_MODEL` env var (default: `databricks-claude-sonnet-4-6`). MLflow tracing is optional — controlled by `MLFLOW_EXPERIMENT_ID`.
+The GenieRX spec (`docs/genierx-spec.md`) defines the core analysis and recommendation framework used throughout this project. **Always consult it when working on analysis, scoring, or recommendation features.**
 
-## Environment Variables
+Key concepts from the spec:
 
-Defined in `app.yaml`. Key ones:
-- `SQL_WAREHOUSE_ID` — from app resource `sql-warehouse`
-- `LLM_MODEL` — serving endpoint name
-- `LAKEBASE_HOST`, `LAKEBASE_PORT`, `LAKEBASE_DATABASE`, `LAKEBASE_INSTANCE_NAME` — Lakebase config
-- `MLFLOW_EXPERIMENT_ID` — enables MLflow tracing (validated at startup, cleared if invalid)
-- `GENIE_TARGET_DIRECTORY` — where new spaces are created (default `/Shared/`)
-- `DEV_USER_EMAIL` — local dev only
+- **Authoritative Facts** — raw data from systems of record, safe to surface directly
+- **Canonical Metrics** — governed KPIs with stable definitions and cross-team agreement
+- **Heuristic Signals** — derived fields with subjective thresholds; must always carry caveats
 
-Local dev uses `.env.local` (loaded first with override) then `.env`.
+When implementing or modifying any analyzer, scorer, or recommender logic, ensure field classifications align with this taxonomy. Heuristic signals must never be presented as authoritative facts in Genie answers.
 
-## Dev/Test Workflow
+## Key Documentation
 
-There is no local dev server — all testing is done by syncing code to Databricks and redeploying:
+- `docs/genierx-spec.md` — GenieRX analyzer/recommender specification
+- `docs/genie-space-schema.md` — Genie space schema reference
+- `docs/checklist-by-schema.md` — Analysis checklist organized by schema section
+- `CUJ.md` — Core user journeys and product analysis
 
-1. Edit code locally
-2. `databricks sync --watch . /Workspace/Users/<email>/genie-workbench` picks up changes automatically
-3. Re-run `databricks apps deploy <app-name> --source-code-path /Workspace/Users/<email>/genie-workbench` to trigger a new deployment
-4. Test in the deployed Databricks App
+## Development
 
-Do NOT suggest running `uvicorn` or `npm run dev` locally. The app depends on Databricks-managed resources (OBO auth, Lakebase, serving endpoints) that aren't available outside a Databricks App environment.
-
-## Gotchas
-
-- **frontend/dist/ is gitignored but NOT databricksignored** — the built React app must be synced to workspace for deployment. Build before `databricks sync`.
-- **`.databricksignore` excludes `*.md`** but explicitly includes `backend/references/schema.md` (needed at runtime by the analyzer).
-- **OBO ContextVar and streaming** — for SSE endpoints, the ContextVar is NOT cleared after `call_next` because the response streams lazily. Streaming handlers stash the token on `request.state` and re-set it inside the generator.
-- **Two separate "analysis" paths** — IQ Scan (`scanner.py`, rule-based, instant) and Deep Analysis (`analyzer.py`, LLM-based, streaming). They produce different outputs and don't cross-reference.
-- **Two separate "fix" paths** — Fix Agent (from scan findings, auto-applies patches) and Optimize flow (from benchmark labeling, produces suggestions for a new space). They're independent.
-- **Vite proxy** — dev frontend at :5173 proxies `/api` to :8000. In production, FastAPI serves static files from `frontend/dist/` directly.
-- **Python 3.11+** required (`pyproject.toml`). Uses `uv` for dependency management (`uv.lock` present).
-- **Root `package.json`** exists solely as a build hook for Databricks Apps — `postinstall` chains to `frontend/npm install`, `build` chains to `frontend/npm run build`.
+```bash
+# Backend (from repo root)
+uv run start-server
 
-## Code Style
+# Frontend
+cd frontend && npm run dev
+```
 
-- Backend: Python, Pydantic models, FastAPI routers, no class-based views
-- Frontend: React 19 + TypeScript + Tailwind CSS v4 + Vite 7, functional components only
-- UI primitives in `frontend/src/components/ui/` (button, card, badge, etc.) using `class-variance-authority`
-- Path alias `@` maps to `frontend/src/` (configured in `vite.config.ts` and `tsconfig.app.json`)
-- All API routes prefixed with `/api`
-- Pydantic models in `backend/models.py`, TypeScript mirrors in `frontend/src/types/index.ts` — keep in sync
+Frontend runs at `localhost:5173`, proxies API calls to backend at `localhost:8000`.
diff --git a/docs/genierx-spec.md b/docs/genierx-spec.md
new file mode 100644
index 0000000..3c5c23f
--- /dev/null
+++ b/docs/genierx-spec.md
@@ -0,0 +1,287 @@
+# GenieRX Specification
+
+## Purpose
+
+GenieRX is an analyzer and recommender for Genie spaces and their underlying semantic models. Its job is to:
+
+- Inspect how data and metrics are modeled for Genie (tables, views, metric views, knowledge store expressions, instructions).
+- Classify fields into authoritative facts, canonical metrics, and heuristic signals.
+- Recommend changes that align with Databricks best practices for Genie, Unity Catalog metric views, and the Genie knowledge store.
+
+GenieRX must never change data or semantics itself; it produces a structured review and recommendation set that humans can apply (or that other automation can implement safely).
+
+---
+
+## 1. Core Concepts and Taxonomy
+
+GenieRX must reason about every field, metric, and score using the following taxonomy:
+
+### 1.1 Authoritative Facts
+
+**Definition:**
+- Directly sourced from a system of record (billing, CRM, product telemetry, etc.).
+- No business logic applied beyond basic cleaning (type casting, null handling).
+
+**Examples:**
+- Transaction amounts, usage measures, timestamps from logs.
+- Pipeline stages from CRM.
+- Owner/segment assignments from master data.
+
+**GenieRX behavior:**
+- Treat these as safe for Genie to query directly (tables or metric-view sources).
+- Recommend surfacing them as columns, dimensions, or base measures without caveats, as long as upstream data quality is acceptable.
+
+### 1.2 Canonical Metrics
+
+**Definition:**
+- Derived metrics with:
+  - A clear, stable SQL definition.
+  - Cross-team agreement (e.g., analytics, finance, ops).
+  - An owner who is accountable for changes.
+- Examples: revenue, active users, funnel conversion, churn rate, cost per order.
+
+**GenieRX behavior:**
+- Prefer to implement as metric view measures or knowledge-store measures/filters/dimensions, not as ad hoc SQL in Genie instructions.
+- Encourage:
+  - Centralized definition in Unity Catalog metric views where possible.
+  - Short, precise names plus documentation (description + semantic metadata).
+- Mark these as safe to present as "facts" in Genie answers (subject to the usual "data as of & filters" context).
+
+### 1.3 Heuristic Signals
+
+**Definition:**
+- Derived fields that depend on subjective thresholds, incomplete joins, fragile text features, or evolving business rules.
+- Examples:
+  - Coverage / gap flags based on keyword lists and spend thresholds.
+  - "Is_X" tags inferred via heuristic classification.
+  - Composite opportunity or risk scores with arbitrary buckets/weights.
+  - Buckets that encode assumptions about missing data or multi-tenant joins.
+
+**GenieRX behavior:**
+- Always treat these as heuristic signals, not authoritative facts.
+- Recommend:
+  - Implementing them as measures or filters with explicit caveats in the description and/or semantic metadata (for example, "heuristic", "approximate", "experimental").
+  - Avoiding column names that imply certainty (prefer `potential_*`, `*_score`, `*_heuristic_flag`).
+- When these are currently modeled as bare columns, GenieRX should:
+  - Flag them as high risk for misinterpretation in Genie answers.
+  - Suggest converting them into modeled measures/filters with clear labels and descriptions.
+
+---
+
+## 2. Modeling Guidelines with Metric Views
+
+When the workspace uses Unity Catalog metric views as the semantic layer for Genie, GenieRX must evaluate and recommend according to the following patterns.
+
+### 2.1 Use Metric Views as the Primary Semantic Layer
+
+**Best practice:**
+- For governed KPIs and complex aggregations, define them once as metric views and use those in:
+  - Genie spaces.
+  - Dashboards and alerts.
+  - SQL clients and downstream tools.
+
+**GenieRX should:**
+- Prefer metric views over ad hoc SQL in Genie instructions when:
+  - Metrics are reused in many questions or dashboards.
+  - Correct rollup is non-trivial (ratios, distinct counts, windowed metrics, etc.).
+
+### 2.2 Organize Semantics into Dimensions, Measures, and Filters
+
+Metric views express semantics as:
+- **Dimensions:** group-by attributes (e.g., account, segment, product, region, time grain).
+- **Measures:** aggregated values (sum, avg, distinct count, ratios, scores).
+- **Filters:** structured conditions used often for WHERE / HAVING.
+
+**GenieRX should:**
+- Check that:
+  - Group-by attributes are modeled as dimensions, not repeated ad hoc in SQL.
+  - Key KPIs are measures, not free-floating columns.
+  - Common conditions ("active customers", "large orders", "priority accounts") are modeled as filters or boolean measures where appropriate.
+- Recommend refactors such as:
+  - "Promote this repeated WHERE condition into a named filter `active_customers`."
+  - "Move this ratio calculation into a metric-view measure instead of recomputing it in instructions."
+
+### 2.3 Implement Heuristic Logic as Measures/Filters, Not Core Columns
+
+For heuristic signals:
+- Prefer to keep raw inputs (spend, text features, joins) as authoritative columns, and encode heuristic logic as measures/filters in the metric view:
+  - **Measures:** scores or counts indicating likelihood, risk, or opportunity.
+  - **Filters:** boolean expressions such as `has_potential_gap`, `is_priority_account_heuristic`.
+
+**GenieRX should recommend:**
+- Use descriptions and semantic metadata to mark:
+  - Purpose (e.g., "heuristic score to prioritize follow-up").
+  - Known limitations (e.g., "sensitive to join failures; may over-count").
+- Avoid surfacing these measures as "the number of X" without caveats; instead, position them as signals.
+
+### 2.4 Enforce Metric-View Querying Best Practices
+
+Because metric views require explicit measure references:
+- Queries must use the `MEASURE()` aggregate function for measures; `SELECT *` is not supported.
+
+**GenieRX should:**
+- Check whether Genie SQL examples and instructions correctly reference measures using `MEASURE()` and:
+  - Flag places where raw measure columns are referenced without `MEASURE()`.
+  - Suggest corrected SQL patterns.
+
+---
+
+## 3. Modeling Guidelines with the Genie Knowledge Store
+
+When the workspace uses Genie knowledge store features (space-level metadata, SQL expressions, entity/value mapping), GenieRX must evaluate and recommend according to these patterns.
+
+### 3.1 Use SQL Expressions for Structured Semantics
+
+The knowledge store lets authors define:
+- **Measures:** KPIs and metrics with explicit SQL expressions.
+- **Filters:** reusable boolean conditions.
+- **Dimensions:** computed attributes for grouping or bucketing.
+
+**GenieRX should:**
+- Encourage using SQL expressions for:
+  - Non-trivial metrics (ratios, distinct counts, window functions).
+  - Business-rule-based flags (e.g., "strategic customers", "at-risk contracts").
+  - Time-derived dimensions (e.g., fiscal period, week buckets).
+- Flag situations where:
+  - The same logic is duplicated across multiple Genie SQL examples/instructions.
+  - Important metrics only exist inside long-form instructions or user prompts.
+
+### 3.2 Align Table/Column Metadata with Business Terms
+
+**Best practice from Genie docs:**
+- Keep spaces topic-specific and domain-focused.
+- Use clear table and column descriptions and hide irrelevant or duplicate columns.
+
+**GenieRX should:**
+- Evaluate:
+  - Whether key business terms are reflected in table/column descriptions and synonyms.
+  - Whether noisy or unused columns remain exposed to Genie.
+- Recommend:
+  - Adding or refining descriptions to explain what measures/dimensions represent.
+  - Adding synonyms where business language differs from schema names.
+  - Hiding columns that are raw, deprecated, or confusing for business users.
+
+### 3.3 Distinguish Canonical vs Heuristic in Descriptions
+
+For each SQL expression in the knowledge store, GenieRX should:
+- Classify as canonical metric or heuristic signal.
+- Recommend description patterns, for example:
+  - **Canonical:** "Primary KPI for [domain]. Defined as ... and reviewed by [team]."
+  - **Heuristic:** "Heuristic score that approximates [concept]. Based on thresholds X/Y/Z and subject to misclassification. Use as prioritization signal, not as exact count."
+- Suggest adding explicit notes for Genie:
+  - "When answering questions with this metric, briefly explain that it is a heuristic estimate."
+
+---
+
+## 4. Genie Space Best Practices to Enforce
+
+GenieRX must anchor its recommendations in the official Genie best practices and internal field guidance.
+
+### 4.1 Scope and Data Model
+
+- Spaces should be topic-specific (single domain, business area, or workflow), not "kitchen sink" collections of tables.
+- Use a small number of core tables or metric views with:
+  - Clear relationships (defined either in metric views or in knowledge store join metadata).
+  - Cleaned and de-duplicated columns.
+
+**GenieRX should:**
+- Flag spaces that:
+  - Include many loosely related tables.
+  - Depend heavily on raw staging tables instead of curated or metric views.
+- Recommend:
+  - Splitting domains into separate spaces.
+  - Using curated views / metric views to simplify the model.
+
+### 4.2 Instructions and Examples
+
+**Best practices include:**
+- Keep instructions concise and focused on business rules and semantics, not low-level SQL formatting.
+- Provide example SQL that demonstrates:
+  - Correct use of metric views and measures.
+  - Preferred filters and joins.
+- Use benchmarks and validation questions to evaluate Genie performance over time.
+
+**GenieRX should:**
+- Assess whether instructions:
+  - Explain how core metrics are defined and when to use them.
+  - Avoid unnecessary repetition and token-heavy prose.
+- Recommend:
+  - Extracting embedded business rules from instructions into metric views and knowledge-store expressions.
+  - Adding or refining benchmark question sets for critical KPIs.
+
+---
+
+## 5. GenieRX Review Workflow
+
+When GenieRX analyzes a space or semantic model, it should follow this high-level workflow:
+
+### Step 1: Inventory Sources and Semantics
+
+- List all data sources used by the space:
+  - Tables, views, metric views.
+  - Knowledge-store SQL expressions (measures, filters, dimensions).
+- Identify all exposed fields and measures used in example SQL or benchmarks.
+
+### Step 2: Classify Fields Using the Taxonomy
+
+- For each column/measure, determine if it's an **authoritative fact**, **canonical metric**, or **heuristic signal** based on:
+  - Upstream SoT (billing, CRM, product, etc.).
+  - Presence in metric views or knowledge store.
+  - Use of thresholds, keyword lists, or ad hoc scoring logic.
+
+### Step 3: Check Alignment with Databricks Best Practices
+
+- **Data model:** Topic-focused, few core tables/metric views, clean joins.
+- **Semantics:** Canonical metrics in metric views or knowledge-store measures/filters.
+- **Instructions:** Clear, concise, oriented around business questions and metrics.
+- **Evals:** Benchmarks or validation questions exist for key metrics.
+
+### Step 4: Generate Recommendations in Three Buckets
+
+**Safety & Clarity:**
+- Where might Genie misrepresent heuristic signals as facts?
+- Which metrics need stronger descriptions or caveats?
+
+**Semantic Modeling:**
+- Which repeated logic should be moved into metric views or SQL expressions?
+- Which filters or dimensions should be promoted into named entities?
+
+**Space Design:**
+- Should tables/views be swapped for metric views?
+- Are there irrelevant columns/tables that should be hidden?
+- Are there missing joins, synonyms, or value dictionaries that would improve answer quality?
+
+### Step 5: Summarize in a User-Friendly Report
+
+For each analyzed space/model, output:
+
+1. **Overview** - 1-2 paragraph summary of main findings and risk level (low/medium/high).
+2. **Semantic Model Assessment** - Table of key metrics/signals with: Name, type (authoritative/canonical/heuristic), grain, and notes.
+3. **Recommended Changes** - Ranked list of concrete actions (e.g., "Create metric view for X", "Convert Y to heuristic measure with description", "Hide columns A/B/C").
+4. **Optional** - Suggestions for benchmarks or validation questions.
+
+---
+
+## 6. Design Principles for GenieRX
+
+GenieRX should always adhere to these principles:
+
+- **Do not fabricate** underlying data or definitions; base assessments only on the actual space configuration, metric views, and knowledge store content.
+- **Bias toward explicit semantics:** Prefer named measures/filters/dimensions over ad hoc SQL or fragile instructions.
+- **Respect governance and ownership:** Highlight when changes would affect canonical metrics owned by other teams; recommend collaboration, not unilateral changes.
+- **Aim for explainability:** Recommendations should be understandable to data and business owners. "Move this heuristic from a column to a measure with caveats" is better than opaque tuning.
+
+---
+
+## Sources
+
+- Unity Catalog metric views | Databricks on AWS
+- Build a knowledge store for more reliable Genie spaces | Databricks on AWS
+- Genie Best Practices
+- [Field Apps] GenieRX: a Genie analyzer / recommender
+- Product Analytics (go/product-analytics)
+- DAIS 2025 - UC Metrics - Discovery - Genie
+- Genie Guidelines
+- Genie Space - Field Engineering Guide
+- Writing Effective Databricks Genie Instructions
+- Genie + Metrics (FEIP-818)

From 250b4a397c7b11158ef0686aceb3a16e7148e802 Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Tue, 10 Mar 2026 09:57:15 -0700
Subject: [PATCH 2/6] docs: update proposal to reflect PRs #6-#8 changes

- Updated line counts (10,178 total backend)
- Documented SP fallback pattern (get_service_principal_client +
  _is_scope_error) added in PR #7 as new integration challenge (#4)
- Documented genie_client.py scope-error retry pattern in pain points
- Updated scorer scaffold with notes on field name changes (space_id/title)
  and space_url addition from PR #6
- Noted SP fallback in get_space_detail from PR #7
---
 agents/scorer/app.py          | 105 +++++++
 docs/architecture-proposal.md | 539 ++++++++++++++++++++++++++++++++++
 2 files changed, 644 insertions(+)
 create mode 100644 agents/scorer/app.py
 create mode 100644 docs/architecture-proposal.md

diff --git a/agents/scorer/app.py b/agents/scorer/app.py
new file mode 100644
index 0000000..9819ea7
--- /dev/null
+++ b/agents/scorer/app.py
@@ -0,0 +1,105 @@
+"""genie-scorer — IQ scoring agent for Genie Spaces.
+
+Extracted from:
+  - backend/routers/spaces.py  (scan, history, star, list endpoints)
+  - backend/services/scanner.py (rule-based scoring engine)
+  - backend/services/lakebase.py (score persistence)
+
+This agent has NO LLM dependency — it's pure rule-based scoring.
+Lowest-risk extraction target; validates the @app_agent pattern.
+"""
+
+from __future__ import annotations
+
+from dbx_agent_app import AgentRequest, AgentResponse, app_agent
+
+
+@app_agent(
+    name="genie-scorer",
+    description=(
+        "IQ scoring for Genie Spaces. Scans space configurations against a "
+        "rule-based scoring rubric (foundation, data setup, SQL assets, "
+        "optimization), persists scores to Lakebase, and tracks score history."
+    ),
+)
+async def scorer(request: AgentRequest) -> AgentResponse:
+    """Route incoming agent requests to the appropriate scoring tool."""
+    # TODO: Parse intent from request.messages and dispatch to tools
+    ...
+
+
+# ── Tools ────────────────────────────────────────────────────────────────────
+# Each tool maps to a current REST endpoint in backend/routers/spaces.py.
+# Domain logic lives in scanner.py (moved as-is from backend/services/).
+
+
+@scorer.tool(
+    description=(
+        "Run an IQ scan on a Genie Space. Fetches the space configuration, "
+        "calculates a score (0-100) across four dimensions (foundation, data "
+        "setup, SQL assets, optimization), and persists the result to Lakebase."
+    ),
+)
+async def scan_space(space_id: str) -> dict:
+    """Source: backend/services/scanner.py::scan_space + backend/routers/spaces.py::trigger_scan"""
+    # Domain logic (scanner.calculate_score) moves here as-is.
+    # OBO auth: use request.user_context.access_token instead of ContextVar.
+    # Lakebase persistence: use local lakebase.py copy.
+    raise NotImplementedError("Phase 2: move scanner.py + lakebase.py here")
+
+
+@scorer.tool(
+    description=(
+        "Get score history for a Genie Space over the last N days. "
+        "Returns a list of {score, maturity, scanned_at} entries."
+    ),
+)
+async def get_history(space_id: str, days: int = 30) -> list[dict]:
+    """Source: backend/services/lakebase.py::get_score_history"""
+    raise NotImplementedError("Phase 2: move lakebase.get_score_history here")
+
+
+@scorer.tool(
+    description="Toggle the star (bookmark) status of a Genie Space.",
+)
+async def toggle_star(space_id: str, starred: bool) -> dict:
+    """Source: backend/services/lakebase.py::star_space"""
+    raise NotImplementedError("Phase 2: move lakebase.star_space here")
+
+
+@scorer.tool(
+    description=(
+        "List all Genie Spaces the user has access to, enriched with IQ "
+        "scores. Supports filtering by name, star status, and score range."
+    ),
+)
+async def list_spaces(
+    search: str | None = None,
+    starred_only: bool = False,
+    min_score: int | None = None,
+    max_score: int | None = None,
+) -> list[dict]:
+    """Source: backend/routers/spaces.py::list_spaces
+
+    Note (PR #6-#8): API response uses `space_id`/`title` fields (not `id`/`display_name`).
+    Returns `space_url` per item (host + /genie/rooms/{space_id}).
+    Uses SP fallback via get_service_principal_client() when OBO token lacks genie scope.
+    """
+    raise NotImplementedError("Phase 2: move list_spaces logic here")
+
+
+@scorer.tool(
+    description="Get detailed space metadata with latest scan result and star status.",
+)
+async def get_space_detail(space_id: str) -> dict:
+    """Source: backend/routers/spaces.py::get_space_detail
+
+    Note (PR #7): Includes SP fallback (_is_scope_error check) for Genie API calls.
+    """
+    raise NotImplementedError("Phase 2: move get_space_detail logic here")
+
+
+# ── Standalone entry point ───────────────────────────────────────────────────
+# For local development: uvicorn agents.scorer.app:app --port 8001
+
+app = scorer.app
diff --git a/docs/architecture-proposal.md b/docs/architecture-proposal.md
new file mode 100644
index 0000000..1069b32
--- /dev/null
+++ b/docs/architecture-proposal.md
@@ -0,0 +1,539 @@
+# Genie Workbench → Multi-Agent Architecture
+
+> **Status:** Proposal
+> **Author:** Stuart Gano
+> **Audience:** Sean Zhang (Workbench maintainer)
+> **Date:** 2026-03-10
+
+---
+
+## Executive Summary
+
+The Genie Workbench is a monolithic Databricks App (~10,200 lines backend) that hand-rolls OBO auth, tool-calling loops, SSE streaming, and SDK wrappers. Two FE-built libraries solve these exact problems:
+
+- **AI Dev Kit** (`databricks-tools-core`) — pre-built Python functions for SQL execution, Unity Catalog browsing, and warehouse management
+- **dbx-agent-app** — `@app_agent` decorator that auto-generates `/invocations` endpoints, agent cards, MCP servers, health checks, and handles OBO auth
+
+This proposal refactors the Workbench into a **multi-agent system** where each capability is a separate, discoverable `@app_agent` app. The result: ~30% less code, free MCP servers, A2A discovery, and `mlflow.genai.evaluate()` support — with zero changes to the React frontend.
+
+---
+
+## Current Architecture (Monolith)
+
+```
+┌─────────────────────────────────────────────────┐
+│  backend/main.py (FastAPI)                      │
+│                                                 │
+│  ┌──────────────────────────────────────────┐   │
+│  │ OBOAuthMiddleware                        │   │
+│  │ (hand-rolled ContextVar + x-forwarded-   │   │
+│  │  access-token extraction)                │   │
+│  └──────────────────────────────────────────┘   │
+│                                                 │
+│  ┌──────────┐ ┌──────────┐ ┌───────────────┐   │
+│  │ routers/ │ │ routers/ │ │ routers/      │   │
+│  │ spaces   │ │ analysis │ │ create        │   │
+│  │ (scan,   │ │ (analyze,│ │ (UC discovery │   │
+│  │  history, │ │  stream, │ │  agent chat,  │   │
+│  │  star,   │ │  query,  │ │  validate,    │   │
+│  │  fix)    │ │  optimize│ │  create)      │   │
+│  └──────────┘ └──────────┘ └───────────────┘   │
+│                                                 │
+│  ┌──────────────────────────────────────────┐   │
+│  │ services/                                │   │
+│  │ scanner.py  analyzer.py  optimizer.py    │   │
+│  │ fix_agent.py  create_agent.py            │   │
+│  │ create_agent_tools.py (2,717 lines!)     │   │
+│  │ create_agent_session.py                  │   │
+│  │ uc_client.py  sql_executor.py            │   │
+│  │ genie_client.py  lakebase.py  auth.py    │   │
+│  └──────────────────────────────────────────┘   │
+│                                                 │
+│  frontend/dist/ (React SPA, static files)       │
+└─────────────────────────────────────────────────┘
+```
+
+### Pain points
+
+| Issue | Impact |
+|-------|--------|
+| `create_agent_tools.py` is 2,717 lines of hand-coded tool definitions + JSON schemas + dispatch table | Every new tool requires ~80 lines of boilerplate |
+| OBO auth in `services/auth.py` (136 lines) uses ContextVar + middleware — breaks in streaming generators | Streaming endpoints need manual `set_obo_user_token()` re-establishment. Recent fix added `get_service_principal_client()` fallback for missing OAuth scopes |
+| `genie_client.py` (244 lines) duplicates SP-fallback pattern (`_is_scope_error`) in every API call | Each new Genie API function must remember to add scope-error retry logic |
+| `sql_executor.py` (220 lines) reimplements what `databricks-tools-core.sql` provides | Maintenance burden, no warehouse auto-detection improvements |
+| `uc_client.py` (60 lines) reimplements what `databricks-tools-core.unity_catalog` provides | Duplicated effort |
+| No agent discovery — other workspace apps can't call Workbench capabilities | Siloed functionality |
+| No eval support — testing requires manual curl/browser interaction | No regression testing pipeline |
+| Monolithic deployment — any change redeploys everything | Slow iteration on individual capabilities |
+
+---
+
+## Proposed Architecture (Multi-Agent)
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  genie-workbench (supervisor)                           │
+│  React SPA + FastAPI shell                              │
+│  Routes frontend API calls → sub-agent /invocations     │
+├─────────────────────────────────────────────────────────┤
+│                                                         │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐             │
+│  │ genie-   │  │ genie-   │  │ genie-   │             │
+│  │ scorer   │  │ analyzer │  │ creator  │             │
+│  │          │  │          │  │          │             │
+│  │ IQ scan  │  │ LLM deep │  │ Space    │             │
+│  │ scoring  │  │ analysis │  │ creation │             │
+│  │ history  │  │ synthesis│  │ wizard   │             │
+│  └──────────┘  └──────────┘  └──────────┘             │
+│                                                         │
+│  ┌──────────┐  ┌──────────┐                            │
+│  │ genie-   │  │ genie-   │                            │
+│  │ optimizer│  │ fixer    │                            │
+│  │          │  │          │                            │
+│  │ Benchmark│  │ AI fix   │                            │
+│  │ labeling │  │ agent    │                            │
+│  │ suggest  │  │ patches  │                            │
+│  └──────────┘  └──────────┘                            │
+└─────────────────────────────────────────────────────────┘
+```
+
+Each sub-agent is a standalone Databricks App with:
+- **`@app_agent` decorator** — auto-generates `/invocations`, `/.well-known/agent.json`, `/health`, MCP server
+- **OBO auth** — handled by `request.user_context` (replaces ContextVar middleware)
+- **Tool definitions** — auto-generated from `@agent.tool()` decorated functions (replaces JSON schemas)
+- **Eval support** — `app_predict_fn()` bridge to `mlflow.genai.evaluate()`
+
+---
+
+## Agent Decomposition
+
+### Agent Boundaries
+
+| Agent | Source | Tools | Needs Lakebase? | Streaming? | LLM? |
+|-------|--------|-------|-----------------|------------|------|
+| **genie-scorer** | `agents/scorer/` | `scan_space`, `get_history`, `toggle_star`, `list_spaces` | Yes (scores, stars) | No | No |
+| **genie-analyzer** | `agents/analyzer/` | `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql` | No | Yes (SSE) | Yes |
+| **genie-creator** | `agents/creator/` | All 16 current tools (discover_*, describe_*, profile_*, generate_config, etc.) | Yes (sessions) | Yes (SSE) | Yes |
+| **genie-optimizer** | `agents/optimizer/` | `generate_suggestions`, `merge_config`, `label_benchmark` | No | No (heartbeat SSE) | Yes |
+| **genie-fixer** | `agents/fixer/` | `generate_fixes`, `apply_patch` | No | Yes (SSE) | Yes |
+| **supervisor** | root `app.py` | Routes to sub-agents, serves React SPA, `/api/settings`, `/api/auth` | Yes (starred) | Proxy | No |
+
+### What moves where
+
+```
+backend/services/scanner.py        → agents/scorer/scanner.py       (as-is, domain logic)
+backend/services/analyzer.py       → agents/analyzer/analyzer.py    (as-is, domain logic)
+backend/services/optimizer.py      → agents/optimizer/optimizer.py  (as-is, domain logic)
+backend/services/fix_agent.py      → agents/fixer/fix_agent.py     (as-is, domain logic)
+backend/services/create_agent.py   → agents/creator/agent.py       (as-is, domain logic)
+backend/services/create_agent_session.py → agents/creator/session.py (as-is)
+backend/prompts_create/            → agents/creator/prompts/        (as-is)
+backend/references/                → agents/creator/references/     (as-is)
+
+backend/services/uc_client.py      → DELETED (replaced by databricks-tools-core)
+backend/sql_executor.py            → DELETED (replaced by databricks-tools-core)
+backend/routers/spaces.py          → DISSOLVED (endpoints become scorer/supervisor tools)
+backend/routers/analysis.py        → DISSOLVED (endpoints become analyzer/optimizer tools)
+backend/routers/create.py          → DISSOLVED (endpoints become creator tools)
+```
+
+### What stays custom (irreplaceable domain logic)
+
+These files contain business logic specific to GenieIQ/GenieRx and move to their respective agents unchanged:
+
+- `scanner.py` — Rule-based IQ scoring (maturity levels, dimension weights)
+- `analyzer.py` — LLM checklist evaluation with session management
+- `optimizer.py` — Optimization suggestion generation from labeling feedback
+- `fix_agent.py` — Patch generation + application via Genie API
+- `create_agent.py` — Tool-calling loop with message compaction, JSON repair, session recovery
+- `create_agent_session.py` — Two-tier session persistence (memory + Lakebase)
+- `prompts_create/` — Dynamic prompt assembly (9 modules: core, data_sources, requirements, plan, etc.)
+- `references/schema.md` — Genie Space schema reference
+- `genie_creator.py` — Genie API write operations
+- `genie_client.py` — Genie API read operations (including SP-fallback for missing OAuth scopes, added in PR #7)
+- `lakebase.py` — PostgreSQL persistence with in-memory fallback
+
+---
+
+## What Gets Replaced
+
+### 1. Tool Definition Boilerplate → `@agent.tool()` Decorators
+
+**Before** (create_agent_tools.py, ~80 lines per tool):
+```python
+TOOL_DEFINITIONS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "discover_catalogs",
+            "description": "List all Unity Catalog catalogs the user has access to.",
+            "parameters": {"type": "object", "properties": {}, "required": []},
+        },
+    },
+    # ... 15 more tool definitions with nested JSON schemas ...
+]
+
+def handle_tool_call(name: str, arguments: dict, session_config=None) -> dict:
+    handlers = {
+        "discover_catalogs": _discover_catalogs,
+        "discover_schemas": _discover_schemas,
+        # ... 14 more entries ...
+    }
+    handler = handlers.get(name)
+    # ... dispatch logic ...
+```
+
+**After** (auto-generated from function signatures):
+```python
+@creator.tool(description="List all Unity Catalog catalogs the user has access to.")
+async def discover_catalogs() -> dict:
+    from databricks_tools_core.unity_catalog import list_catalogs
+    return {"catalogs": list_catalogs()}
+
+@creator.tool(description="List schemas within a catalog.")
+async def discover_schemas(catalog: str) -> dict:
+    from databricks_tools_core.unity_catalog import list_schemas
+    return {"schemas": list_schemas(catalog)}
+```
+
+**Impact:** ~580 lines of JSON schemas + 40-line dispatch table → auto-generated.
+
+### 2. OBO Auth Middleware + SP Fallback → `request.user_context`
+
+**Before** (main.py + auth.py + genie_client.py):
+```python
+# main.py — ContextVar middleware
+class OBOAuthMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request, call_next):
+        token = request.headers.get("x-forwarded-access-token", "")
+        if token:
+            set_obo_user_token(token)  # ContextVar
+        request.state.user_token = token
+        response = await call_next(request)
+        if not is_streaming:
+            clear_obo_user_token()
+        return response
+
+# auth.py — SP fallback for scope errors (added in PR #7)
+def get_service_principal_client() -> WorkspaceClient:
+    """Bypass OBO for ops requiring scopes the user token lacks."""
+    return _get_default_client()
+
+# genie_client.py — every API function repeats this pattern
+try:
+    return _get_space_with_client(client, genie_space_id)
+except Exception as e:
+    if _is_scope_error(e):
+        sp_client = get_service_principal_client()
+        return _get_space_with_client(sp_client, genie_space_id)
+
+# In streaming generators:
+if user_token:
+    set_obo_user_token(user_token)  # Must re-establish in generator!
+```
+
+**After** (`@app_agent` handles it):
+```python
+@app_agent(name="genie-scorer", ...)
+async def scorer(request: AgentRequest) -> AgentResponse:
+    # request.user_context.access_token is automatically available
+    # No ContextVar management, no SP fallback boilerplate
+    ...
+```
+
+**Impact:** ~30 lines of middleware + SP fallback pattern duplicated across every API call → zero.
+
+### 3. UC Client + SQL Executor → `databricks-tools-core`
+
+| Current | Lines | Replacement |
+|---------|-------|-------------|
+| `backend/services/uc_client.py` | 60 | `from databricks_tools_core.unity_catalog import list_catalogs, list_schemas, list_tables` |
+| `backend/sql_executor.py` | 220 | `from databricks_tools_core.sql import execute_sql, get_best_warehouse` |
+| Warehouse auto-detection | 30 | `get_best_warehouse()` |
+
+**Impact:** 310 lines deleted, replaced by maintained library functions.
+
+---
+
+## Deployment Topology
+
+### agents.yaml
+
+```yaml
+project:
+  name: genie-workbench
+  workspace_path: /Workspace/Shared/apps
+
+agents:
+  - name: scorer
+    source: ./agents/scorer
+  - name: analyzer
+    source: ./agents/analyzer
+  - name: creator
+    source: ./agents/creator
+  - name: optimizer
+    source: ./agents/optimizer
+  - name: fixer
+    source: ./agents/fixer
+  - name: supervisor
+    source: .
+    depends_on: [scorer, analyzer, creator, optimizer, fixer]
+    url_env_map:
+      scorer: SCORER_URL
+      analyzer: ANALYZER_URL
+      creator: CREATOR_URL
+      optimizer: OPTIMIZER_URL
+      fixer: FIXER_URL
+```
+
+Each agent deploys as its own Databricks App with:
+- Its own `app.yaml` defining env vars and resource bindings
+- Its own service principal (for Lakebase, LLM endpoint access)
+- Auto-generated `/.well-known/agent.json` for A2A discovery
+- Auto-generated MCP server for tool integration
+
+### Per-Agent app.yaml Example (scorer)
+
+```yaml
+command: ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
+env:
+  - name: LAKEBASE_HOST
+    value: ""
+  - name: LAKEBASE_INSTANCE_NAME
+    value: ""
+```
+
+---
+
+## Wire Protocol
+
+### Frontend → Supervisor → Sub-Agents
+
+The React SPA continues to hit the same API paths (`/api/spaces/*`, `/api/analyze/*`, `/api/create/*`). The supervisor proxies requests to sub-agents:
+
+```
+Browser → /api/spaces/scan       → supervisor → genie-scorer /invocations
+Browser → /api/analyze/stream    → supervisor → genie-analyzer /invocations
+Browser → /api/create/agent/chat → supervisor → genie-creator /invocations
+Browser → /api/optimize          → supervisor → genie-optimizer /invocations
+Browser → /api/spaces/{id}/fix   → supervisor → genie-fixer /invocations
+```
+
+The supervisor uses the Responses Agent protocol (or simple HTTP proxying) to forward requests. For streaming endpoints, the supervisor proxies SSE responses transparently.
+
+### Agent-to-Agent (A2A) Discovery
+
+After deployment, each agent exposes `/.well-known/agent.json`:
+
+```json
+{
+  "name": "genie-scorer",
+  "description": "IQ scoring for Genie Spaces",
+  "url": "https://genie-workbench-scorer.cloud.databricks.com",
+  "tools": [
+    {"name": "scan_space", "description": "Run IQ scan on a Genie Space"},
+    {"name": "get_history", "description": "Get score history"},
+    {"name": "toggle_star", "description": "Toggle star on a space"}
+  ]
+}
+```
+
+Other workspace apps can discover and call these agents using `AgentDiscovery`.
+
+---
+
+## Migration Path (Phased, Backwards-Compatible)
+
+### Phase 1: Scaffolding + Architecture Doc ← **This PR**
+
+- Architecture proposal for review
+- `agents.yaml` deployment config
+- Skeleton `app.py` + `app.yaml` for each agent
+- No behavior changes to existing monolith
+
+### Phase 2: Extract genie-scorer (lowest risk)
+
+**Why first:** No LLM calls, no streaming, no sessions — pure rule-based scoring. Validates the `@app_agent` pattern with minimal risk.
+
+Files moved:
+- `backend/services/scanner.py` → `agents/scorer/scanner.py` (as-is)
+- Relevant Lakebase functions → `agents/scorer/lakebase.py`
+
+What gets deleted from monolith:
+- Scan/history/star endpoints from `backend/routers/spaces.py` (~80 lines)
+
+### Phase 3: Extract genie-fixer (streaming + LLM, medium complexity)
+
+**Why second:** Streaming SSE + LLM calls, but simpler than creator (no sessions, no 16 tools).
+
+Files moved:
+- `backend/services/fix_agent.py` → `agents/fixer/fix_agent.py`
+- Fix prompt → `agents/fixer/prompts.py`
+
+Validates: Streaming via async generator → SSE (auto-handled by `@app_agent`)
+
+### Phase 4: Extract genie-analyzer (streaming + LLM, high complexity)
+
+Files moved:
+- `backend/services/analyzer.py` → `agents/analyzer/analyzer.py`
+- Analysis prompts → `agents/analyzer/prompts/`
+
+Tools: `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql`
+
+### Phase 5: Extract genie-optimizer
+
+Files moved:
+- `backend/services/optimizer.py` → `agents/optimizer/optimizer.py`
+- Benchmark labeling logic → `agents/optimizer/labeling.py`
+
+Tools: `generate_suggestions`, `merge_config`, `label_benchmark`
+
+### Phase 6: Extract genie-creator (most complex, last)
+
+**Why last:** 16 tools, session persistence, complex tool-calling loop with message compaction. Hardest extraction.
+
+Key change: 16 hand-coded tool definitions become `@creator.tool()` decorators:
+```python
+@creator.tool(description="List Unity Catalog catalogs")
+async def discover_catalogs() -> dict:
+    from databricks_tools_core.unity_catalog import list_catalogs
+    return {"catalogs": list_catalogs()}
+```
+
+What stays custom: Dynamic prompt assembly, session persistence, message compaction, config generation/validation. These are domain logic.
+
+What gets replaced:
+- Tool definition boilerplate (~580 lines of JSON schemas → auto-generated from function signatures)
+- `handle_tool_call()` dispatcher (~40 lines → auto-routing)
+- OBO middleware → `request.user_context`
+
+### Phase 7: Supervisor + Frontend
+
+The supervisor becomes a thin shell that:
+1. Serves the React SPA (static files)
+2. Routes API calls to sub-agents
+3. Handles settings and auth endpoints
+
+Frontend changes: **Minimal.** API client (`frontend/src/lib/api.ts`) keeps hitting the same paths. The supervisor proxies to sub-agents transparently.
+
+### Phase 8: AI Dev Kit Integration
+
+Replace hand-rolled utilities with `databricks-tools-core` across all agents:
+
+| Current | Lines | Replacement |
+|---------|-------|-------------|
+| `backend/services/uc_client.py` | 60 | `databricks_tools_core.unity_catalog` |
+| `backend/sql_executor.py` | 220 | `databricks_tools_core.sql` |
+| Warehouse auto-detection in sql_executor | 30 | `get_best_warehouse()` |
+
+---
+
+## Eval Story
+
+Each agent becomes independently evaluatable via the `dbx-agent-app` bridge:
+
+```python
+from dbx_agent_app.bridge import app_predict_fn
+import mlflow
+
+predict = app_predict_fn("https://genie-workbench-scorer.cloud.databricks.com")
+results = mlflow.genai.evaluate(
+    data=eval_dataset,
+    predict_fn=predict,
+    scorers=[correctness_scorer, latency_scorer],
+)
+```
+
+This replaces the current "manual curl and check" testing with automated, repeatable evaluation pipelines for each agent independently.
+
+---
+
+## Integration Challenges
+
+### 1. OBO Tokens in Streaming Generators
+
+The creator agent's tool-calling loop needs the user's OBO token across multiple LLM rounds within a single SSE stream. `@app_agent` provides `request.user_context`, but we need to pass the token into the agent session and re-establish it per-round.
+
+**Solution:** Pass `user_context.access_token` into the agent session object. Each tool call creates a fresh `WorkspaceClient(token=session.access_token)`.
+
+### 2. Complex Tool Schemas
+
+`generate_config` has 10+ nested parameters (tables with column configs, SQL snippets with expressions/measures/filters, etc.). The `@agent.tool()` decorator auto-generates schemas from type hints, but deeply nested structures need Pydantic models:
+
+```python
+class TableConfig(BaseModel):
+    identifier: str
+    description: str = ""
+    column_configs: list[ColumnConfig] = []
+
+@creator.tool(description="Generate a complete Genie Space configuration")
+async def generate_config(tables: list[TableConfig], ...) -> dict:
+    ...
+```
+
+### 3. Frontend Transparency
+
+The React SPA currently hits `/api/spaces/*`, `/api/analysis/*`, `/api/create/*`. Two options:
+
+1. **Supervisor proxy** (recommended): Supervisor exposes the same paths and routes to sub-agents. Zero frontend changes.
+2. **Direct sub-agent calls**: Frontend API client updated to call sub-agent URLs. Requires frontend changes but eliminates proxy latency.
+
+### 4. SP Fallback for OAuth Scope Gaps
+
+PRs #7/#8 added a `get_service_principal_client()` + `_is_scope_error()` pattern: when the user's OBO token lacks the `genie` OAuth scope, the code retries with the app's service principal. This pattern is currently duplicated in `genie_client.py` (`get_genie_space`, `list_genie_spaces`) and `routers/spaces.py` (`get_space_detail`). In the multi-agent model, `@app_agent` may handle this differently — we need to verify whether the framework supports automatic SP fallback or if we keep this pattern in the domain logic.
+
+### 5. Shared Lakebase
+
+Multiple agents need Lakebase access (scorer for scores/stars, creator for sessions). Each agent gets its own Lakebase credentials via `app.yaml` resource bindings. The shared `lakebase.py` module moves to a small shared library or gets duplicated per-agent (it's only 269 lines).
+
+---
+
+## Estimated Impact
+
+| Metric | Before | After |
+|--------|--------|-------|
+| Backend Python lines | ~10,178 | ~7,100 (30% reduction from eliminating boilerplate) |
+| Files deleted | 0 | 5 (routers + utility wrappers replaced by libraries) |
+| Tool definition boilerplate | ~580 lines JSON schemas | 0 (auto-generated from type hints) |
+| Dispatch table code | ~40 lines | 0 (auto-routing by `@app_agent`) |
+| OBO auth code | ~30 lines middleware | 0 (handled by framework) |
+| Auto-generated endpoints | 0 | 30+ (5 agents × 6 endpoints each: /invocations, /health, agent.json, MCP, etc.) |
+| MCP servers | 0 | 5 (one per agent, free) |
+| Agent discovery | None | A2A protocol, workspace-wide |
+| Eval support | Manual testing | `mlflow.genai.evaluate()` via bridge |
+| Deployment | Single `databricks apps deploy` | `dbx-agent-app deploy --config agents.yaml` (per-agent or all) |
+
+---
+
+## Verification Plan
+
+1. **Unit tests:** Each agent's tools can be tested independently via `agent(AgentRequest(...))` — the `@app_agent` decorator makes the handler directly callable.
+
+2. **Integration tests:** Deploy all agents locally (`uvicorn agents/scorer/app:app --port 8001`, etc.), configure supervisor with local URLs, run existing E2E tests.
+
+3. **A2A discovery:** After deploying to Databricks Apps, verify `/.well-known/agent.json` returns correct agent cards. Use `AgentDiscovery` to scan workspace.
+
+4. **Eval bridge:** Run `mlflow.genai.evaluate()` against each deployed agent using `app_predict_fn()`.
+
+5. **Frontend smoke test:** Verify React SPA still works end-to-end through the supervisor proxy.
+
+---
+
+## Files in This PR
+
+### New files
+- `docs/architecture-proposal.md` — this document
+- `agents.yaml` — multi-agent deployment config
+- `agents/scorer/app.py` — scorer agent scaffold
+- `agents/scorer/app.yaml` — scorer Databricks Apps config
+- `agents/analyzer/app.py` — analyzer agent scaffold
+- `agents/analyzer/app.yaml` — analyzer Databricks Apps config
+- `agents/creator/app.py` — creator agent scaffold
+- `agents/creator/app.yaml` — creator Databricks Apps config
+- `agents/optimizer/app.py` — optimizer agent scaffold
+- `agents/optimizer/app.yaml` — optimizer Databricks Apps config
+- `agents/fixer/app.py` — fixer agent scaffold
+- `agents/fixer/app.yaml` — fixer Databricks Apps config
+
+### No modified files
+This is a proposal PR — the existing monolith is untouched. All new files are additive.

From 2e69061585fab890ea280682c3ea38affb614de4 Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Tue, 10 Mar 2026 11:30:06 -0700
Subject: [PATCH 3/6] feat: implement integration challenge solutions for
 multi-agent architecture

Add shared utilities and agent-specific modules that solve the 5 integration
challenges identified in the architecture proposal:

1. OBO Auth Bridge (agents/_shared/auth_bridge.py): Context manager that
   bridges @app_agent UserContext into both monolith ContextVar auth and
   databricks-tools-core ContextVars in a single `with` block.

2. Complex Tool Schemas (agents/creator/schemas.py): Pydantic models that
   auto-generate JSON Schema for @app_agent tool registration, replacing
   ~580 lines of hand-written schema in create_agent_tools.py.

3. Frontend Transparent Proxy (agents/supervisor/proxy.py): Ordered route
   table mapping all 28 frontend API paths to sub-agents with SSE stream
   detection and glob support for path parameters.

4. SP Fallback Decorator (agents/_shared/sp_fallback.py): Extracts the
   _is_scope_error + retry-with-SP pattern from genie_client.py into a
   reusable decorator and convenience function.

5. Shared Lakebase Client (agents/_shared/lakebase_client.py): Connection
   pool lifecycle with idempotent DDL per agent and in-memory fallback.

Also updates scorer/app.py and creator/app.py scaffolds to demonstrate
the integration patterns, and updates the proposal doc with concrete
solutions replacing the placeholder descriptions.
---
 agents/_shared/__init__.py        |   7 +
 agents/_shared/auth_bridge.py     | 125 ++++++++++++++++++
 agents/_shared/lakebase_client.py | 182 +++++++++++++++++++++++++
 agents/_shared/sp_fallback.py     |  93 +++++++++++++
 agents/creator/app.py             | 213 ++++++++++++++++++++++++++++++
 agents/creator/schemas.py         | 166 +++++++++++++++++++++++
 agents/scorer/app.py              |  41 +++++-
 agents/supervisor/__init__.py     |   0
 agents/supervisor/proxy.py        | 179 +++++++++++++++++++++++++
 docs/architecture-proposal.md     | 118 +++++++++++++----
 10 files changed, 1094 insertions(+), 30 deletions(-)
 create mode 100644 agents/_shared/__init__.py
 create mode 100644 agents/_shared/auth_bridge.py
 create mode 100644 agents/_shared/lakebase_client.py
 create mode 100644 agents/_shared/sp_fallback.py
 create mode 100644 agents/creator/app.py
 create mode 100644 agents/creator/schemas.py
 create mode 100644 agents/supervisor/__init__.py
 create mode 100644 agents/supervisor/proxy.py

diff --git a/agents/_shared/__init__.py b/agents/_shared/__init__.py
new file mode 100644
index 0000000..04942cf
--- /dev/null
+++ b/agents/_shared/__init__.py
@@ -0,0 +1,7 @@
+"""Shared utilities for Genie Workbench agents.
+
+Provides cross-cutting concerns that multiple agents need:
+- auth_bridge: Bridge @app_agent UserContext into monolith + AI Dev Kit auth
+- sp_fallback: Service principal fallback for Genie API scope errors
+- lakebase_client: Shared PostgreSQL connection pool management
+"""
diff --git a/agents/_shared/auth_bridge.py b/agents/_shared/auth_bridge.py
new file mode 100644
index 0000000..e6a8071
--- /dev/null
+++ b/agents/_shared/auth_bridge.py
@@ -0,0 +1,125 @@
+"""Bridge @app_agent UserContext into both monolith and AI Dev Kit auth systems.
+
+During migration, agent tools receive `request.user_context` from @app_agent,
+but domain logic (scanner, genie_client, etc.) calls `get_workspace_client()`
+from the monolith's auth module. And `databricks-tools-core` functions use
+their own separate ContextVars via `set_databricks_auth()`.
+
+This module provides `obo_context()` — a single context manager that sets up
+all three auth systems so existing domain logic works unchanged inside agents.
+
+Source patterns:
+    - backend/services/auth.py:25      (_obo_client ContextVar)
+    - backend/services/auth.py:33-58   (set_obo_user_token)
+    - databricks_tools_core/auth.py    (set_databricks_auth / clear_databricks_auth)
+"""
+
+from __future__ import annotations
+
+import os
+from contextlib import contextmanager
+from contextvars import ContextVar
+from typing import Optional
+
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
+
+
+# Monolith-compatible ContextVar (mirrors backend/services/auth.py:25)
+_obo_client: ContextVar[Optional[WorkspaceClient]] = ContextVar(
+    "_obo_client", default=None
+)
+
+# Singleton SP client (lazy-initialized)
+_sp_client: Optional[WorkspaceClient] = None
+
+
+@contextmanager
+def obo_context(access_token: str, host: Optional[str] = None):
+    """Set up OBO auth for monolith code and databricks-tools-core.
+
+    Creates a per-request WorkspaceClient from the user's OBO token and
+    stores it in both the monolith ContextVar and the AI Dev Kit ContextVars.
+
+    Usage in any agent tool::
+
+        @scorer.tool(description="Run IQ scan on a Genie Space")
+        async def scan_space(space_id: str, request: AgentRequest) -> dict:
+            with obo_context(request.user_context.access_token):
+                # All of these now work:
+                # - get_workspace_client() returns OBO client
+                # - databricks-tools-core functions use OBO token
+                result = scanner.calculate_score(space_id)
+
+    For streaming generators, capture the token before yielding and
+    re-enter obo_context() per-yield. This matches the pattern in
+    backend/routers/create.py:125-198.
+
+    Args:
+        access_token: The user's OBO access token.
+        host: Databricks workspace host. Defaults to DATABRICKS_HOST env var.
+
+    Yields:
+        WorkspaceClient configured with the user's OBO token.
+    """
+    resolved_host = host or os.environ.get("DATABRICKS_HOST", "")
+
+    # 1. Create OBO WorkspaceClient (monolith pattern from auth.py:49-58)
+    #    Must set auth_type="pat" and clear client_id/client_secret to prevent
+    #    the SDK from using oauth-m2m from env vars on Databricks Apps.
+    cfg = Config(
+        host=resolved_host,
+        token=access_token,
+        auth_type="pat",
+        client_id=None,
+        client_secret=None,
+    )
+    client = WorkspaceClient(config=cfg)
+    token = _obo_client.set(client)
+
+    # 2. Set databricks-tools-core ContextVars (if available)
+    has_tools_core = False
+    try:
+        from databricks_tools_core.auth import (
+            set_databricks_auth,
+            clear_databricks_auth,
+        )
+
+        set_databricks_auth(resolved_host, access_token)
+        has_tools_core = True
+    except ImportError:
+        pass
+
+    try:
+        yield client
+    finally:
+        _obo_client.reset(token)
+        if has_tools_core:
+            clear_databricks_auth()
+
+
+def get_workspace_client() -> WorkspaceClient:
+    """Drop-in replacement for backend.services.auth.get_workspace_client().
+
+    Returns the OBO client if inside an obo_context(), otherwise the default
+    singleton (SP on Databricks Apps, CLI/PAT locally).
+
+    Domain logic can import this instead of the monolith version during
+    migration — the behavior is identical.
+    """
+    obo = _obo_client.get()
+    if obo is not None:
+        return obo
+    return get_service_principal_client()
+
+
+def get_service_principal_client() -> WorkspaceClient:
+    """Get the service principal client (bypasses OBO).
+
+    Used for app-level operations and as fallback when the user's OBO token
+    lacks required scopes (e.g., Genie API before consent flow).
+    """
+    global _sp_client
+    if _sp_client is None:
+        _sp_client = WorkspaceClient()
+    return _sp_client
diff --git a/agents/_shared/lakebase_client.py b/agents/_shared/lakebase_client.py
new file mode 100644
index 0000000..e850585
--- /dev/null
+++ b/agents/_shared/lakebase_client.py
@@ -0,0 +1,182 @@
+"""Shared Lakebase (PostgreSQL) connection pool management.
+
+Each agent initializes its own pool from its own app.yaml env vars
+(LAKEBASE_HOST, LAKEBASE_INSTANCE_NAME, etc.). Schema migrations are
+idempotent (IF NOT EXISTS) so agents can boot in any order.
+
+Domain-specific query functions (save_scan_result, get_score_history, etc.)
+stay in each agent's own module — this shared client only manages the pool
+lifecycle, credential generation, and DDL.
+
+Source: backend/services/lakebase.py (269 lines)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_pool = None
+_lakebase_available = False
+
+# In-memory fallback (same pattern as backend/services/lakebase.py:12-17)
+_memory_store: dict = {
+    "scans": {},
+    "history": {},
+    "stars": set(),
+    "seen": set(),
+    "sessions": {},
+}
+
+
+# ── DDL statements per agent (all use IF NOT EXISTS) ──────────────────────────
+
+SCORER_DDL = [
+    """CREATE TABLE IF NOT EXISTS scan_results (
+        space_id TEXT NOT NULL,
+        score INTEGER NOT NULL,
+        maturity TEXT,
+        breakdown JSONB,
+        findings JSONB,
+        next_steps JSONB,
+        scanned_at TIMESTAMPTZ NOT NULL,
+        UNIQUE (space_id, scanned_at)
+    )""",
+    "CREATE TABLE IF NOT EXISTS starred_spaces (space_id TEXT PRIMARY KEY)",
+    "CREATE TABLE IF NOT EXISTS seen_spaces (space_id TEXT PRIMARY KEY)",
+]
+
+CREATOR_DDL = [
+    """CREATE TABLE IF NOT EXISTS agent_sessions (
+        session_id TEXT PRIMARY KEY,
+        data JSONB NOT NULL,
+        updated_at TIMESTAMPTZ DEFAULT NOW()
+    )""",
+]
+
+
+# ── Credential generation (mirrors backend/services/lakebase.py:23-59) ────────
+
+def _generate_lakebase_credential() -> tuple[str, str] | None:
+    """Generate Lakebase OAuth credentials using the Databricks SDK."""
+    instance_name = os.environ.get("LAKEBASE_INSTANCE_NAME")
+    if not instance_name:
+        return None
+
+    try:
+        from agents._shared.auth_bridge import get_service_principal_client
+
+        client = get_service_principal_client()
+        resp = client.api_client.do(
+            method="POST",
+            path="/api/2.0/database/credentials",
+            body={
+                "request_id": "lakebase-pool",
+                "instance_names": [instance_name],
+            },
+        )
+        token = resp.get("token")
+        if not token:
+            logger.warning("Lakebase credential response missing token")
+            return None
+
+        user = os.environ.get("LAKEBASE_USER")
+        if not user:
+            try:
+                me = client.current_user.me()
+                user = me.user_name
+            except Exception:
+                user = "databricks"
+
+        logger.info("Generated Lakebase credential via SDK (user=%s)", user)
+        return user, token
+    except Exception as e:
+        logger.warning("Lakebase credential generation failed: %s", e)
+        return None
+
+
+# ── Pool lifecycle ────────────────────────────────────────────────────────────
+
+async def init_pool(ddl_statements: Optional[list[str]] = None):
+    """Initialize asyncpg pool and run idempotent DDL.
+
+    Call this at agent startup (e.g., in a FastAPI lifespan handler).
+
+    Args:
+        ddl_statements: SQL DDL to execute after connecting.
+            Use SCORER_DDL, CREATOR_DDL, or combine them.
+    """
+    global _pool, _lakebase_available
+
+    host = os.environ.get("LAKEBASE_HOST")
+    if not host:
+        logger.info("LAKEBASE_HOST not set — using in-memory fallback")
+        return
+
+    password = os.environ.get("LAKEBASE_PASSWORD")
+    user = os.environ.get("LAKEBASE_USER", "postgres")
+
+    if not password:
+        cred = _generate_lakebase_credential()
+        if cred:
+            user, password = cred
+        else:
+            logger.warning(
+                "No LAKEBASE_PASSWORD and credential generation failed "
+                "— using in-memory fallback"
+            )
+            return
+
+    try:
+        import asyncpg
+
+        _pool = await asyncpg.create_pool(
+            host=host,
+            port=int(os.environ.get("LAKEBASE_PORT", "5432")),
+            database=os.environ.get("LAKEBASE_DATABASE", "databricks_postgres"),
+            user=user,
+            password=password,
+            min_size=2,
+            max_size=10,
+            command_timeout=30,
+            ssl="require",
+        )
+        _lakebase_available = True
+        logger.info("Lakebase connection pool initialized")
+
+        # Run idempotent DDL
+        if ddl_statements and _pool:
+            async with _pool.acquire() as conn:
+                for ddl in ddl_statements:
+                    await conn.execute(ddl)
+            logger.info("Executed %d DDL statements", len(ddl_statements))
+
+    except Exception as e:
+        logger.warning("Lakebase unavailable: %s. Using in-memory fallback.", e)
+        _lakebase_available = False
+
+
+async def close_pool():
+    """Close the connection pool. Call at agent shutdown."""
+    global _pool
+    if _pool:
+        await _pool.close()
+        _pool = None
+
+
+async def get_pool():
+    """Get the connection pool (or None if using in-memory fallback)."""
+    return _pool
+
+
+def is_available() -> bool:
+    """Check if Lakebase is connected."""
+    return _lakebase_available
+
+
+def get_memory_store() -> dict:
+    """Get the in-memory fallback store (for when Lakebase is unavailable)."""
+    return _memory_store
diff --git a/agents/_shared/sp_fallback.py b/agents/_shared/sp_fallback.py
new file mode 100644
index 0000000..b0bcfad
--- /dev/null
+++ b/agents/_shared/sp_fallback.py
@@ -0,0 +1,93 @@
+"""Service Principal fallback for Genie API scope errors.
+
+When OBO tokens lack the 'genie' scope (before the user consent flow is
+triggered), the Genie API returns scope errors. This module extracts the
+retry-with-SP pattern from the monolith into a reusable decorator and
+convenience function.
+
+Source pattern: backend/services/genie_client.py:22-68
+"""
+
+from __future__ import annotations
+
+import functools
+import logging
+from typing import Callable, TypeVar
+
+from agents._shared.auth_bridge import (
+    get_workspace_client,
+    get_service_principal_client,
+)
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar("T")
+
+
+def _is_scope_error(e: Exception) -> bool:
+    """Check if exception is a missing OAuth scope error.
+
+    Matches the same check in backend/services/genie_client.py:22-25.
+    """
+    msg = str(e).lower()
+    return "scope" in msg or "insufficient_scope" in msg
+
+
+def with_sp_fallback(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator: retry with SP client if OBO token lacks Genie scope.
+
+    The decorated function must accept a ``client`` keyword argument
+    (a WorkspaceClient). On scope error, the function is retried with
+    the service principal client.
+
+    Usage::
+
+        @with_sp_fallback
+        def get_genie_space(space_id: str, *, client=None):
+            client = client or get_workspace_client()
+            return client.api_client.do(
+                "GET", f"/api/2.0/genie/spaces/{space_id}"
+            )
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except Exception as e:
+            if _is_scope_error(e):
+                logger.info(
+                    "%s: OBO scope error, retrying with service principal",
+                    func.__name__,
+                )
+                kwargs["client"] = get_service_principal_client()
+                return func(*args, **kwargs)
+            raise
+
+    return wrapper
+
+
+def genie_api_call(method: str, path: str, **kwargs):
+    """Make a Genie API call with automatic SP fallback.
+
+    Convenience function for simple one-off API calls that don't need
+    the full decorator pattern.
+
+    Args:
+        method: HTTP method (GET, POST, etc.)
+        path: API path (e.g., "/api/2.0/genie/spaces/{id}")
+        **kwargs: Forwarded to ``client.api_client.do()``.
+
+    Returns:
+        API response dict.
+    """
+    client = get_workspace_client()
+    try:
+        return client.api_client.do(method=method, path=path, **kwargs)
+    except Exception as e:
+        if _is_scope_error(e):
+            logger.info("Genie API %s: scope error, retrying with SP", path)
+            sp = get_service_principal_client()
+            if sp is not client:
+                return sp.api_client.do(method=method, path=path, **kwargs)
+        raise
diff --git a/agents/creator/app.py b/agents/creator/app.py
new file mode 100644
index 0000000..966e493
--- /dev/null
+++ b/agents/creator/app.py
@@ -0,0 +1,213 @@
+"""genie-creator — Conversational wizard for building new Genie Spaces.
+
+Extracted from:
+  - backend/routers/create.py              (UC discovery, validation, agent chat, sessions)
+  - backend/services/create_agent.py       (CreateGenieAgent tool-calling loop)
+  - backend/services/create_agent_tools.py (16 tool definitions + implementations)
+  - backend/services/create_agent_session.py (two-tier session persistence)
+  - backend/services/uc_client.py          (UC browsing — replaced by AI Dev Kit)
+  - backend/prompts_create/                (dynamic prompt assembly, 9 modules)
+  - backend/references/                    (schema.md reference)
+  - backend/genie_creator.py              (Genie API write operations)
+
+This is the MOST COMPLEX agent (Phase 6 extraction). The tool-calling loop,
+message compaction, session persistence, and dynamic prompting are all
+irreplaceable domain logic that moves as-is.
+
+What gets replaced:
+  - 580 lines of JSON tool schemas → auto-generated from @creator.tool() signatures
+  - 40-line handle_tool_call() dispatcher → auto-routing
+  - uc_client.py (60 lines) → databricks_tools_core.unity_catalog
+  - sql_executor.py (220 lines) → databricks_tools_core.sql
+
+Streaming: Yes (SSE for agent chat)
+LLM: Yes (tool-calling loop with Claude)
+"""
+
+from __future__ import annotations
+
+from dbx_agent_app import AgentRequest, AgentResponse, app_agent
+
+from agents.creator.schemas import GenerateConfigArgs
+
+
+@app_agent(
+    name="genie-creator",
+    description=(
+        "Conversational wizard for building new Genie Spaces. Guides users "
+        "through requirements gathering, data source discovery, table "
+        "inspection, plan presentation, config generation, and space creation."
+    ),
+)
+async def creator(request: AgentRequest) -> AgentResponse:
+    """Route incoming agent requests to the creator workflow.
+
+    The core tool-calling loop (CreateGenieAgent.chat) moves here as-is.
+    It handles: step detection, LLM streaming, tool dispatch, message
+    compaction, JSON repair, and session management.
+
+    Source: backend/services/create_agent.py::CreateGenieAgent.chat
+    """
+    # TODO: Phase 6 — move CreateGenieAgent.chat here
+    ...
+
+
+# ── UC Discovery Tools ──────────────────────────────────────────────────────
+# Phase 8: Replace implementations with databricks_tools_core
+
+
+@creator.tool(description="List all Unity Catalog catalogs the user has access to.")
+async def discover_catalogs() -> dict:
+    """Source: backend/services/uc_client.py::list_catalogs
+
+    Phase 8: from databricks_tools_core.unity_catalog import list_catalogs
+    """
+    raise NotImplementedError("Phase 6/8")
+
+
+@creator.tool(description="List schemas within a catalog.")
+async def discover_schemas(catalog: str) -> dict:
+    """Source: backend/services/uc_client.py::list_schemas"""
+    raise NotImplementedError("Phase 6/8")
+
+
+@creator.tool(description="List tables within a catalog.schema.")
+async def discover_tables(catalog: str, schema: str) -> dict:
+    """Source: backend/services/uc_client.py::list_tables"""
+    raise NotImplementedError("Phase 6/8")
+
+
+# ── Table Inspection Tools ───────────────────────────────────────────────────
+
+
+@creator.tool(
+    description="Get detailed table metadata: columns, types, descriptions, row count, sample rows.",
+)
+async def describe_table(table: str) -> dict:
+    """Source: backend/services/create_agent_tools.py::_describe_table (lines ~860-960)"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description=(
+        "Profile selected columns: distinct values, null percentage, "
+        "min/max, data type distribution."
+    ),
+)
+async def profile_columns(table: str, columns: list[str] | None = None) -> dict:
+    """Source: backend/services/create_agent_tools.py::_profile_columns"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description="Assess data quality: null rates, duplicate rates, freshness, anomalies.",
+)
+async def assess_data_quality(tables: list[str]) -> dict:
+    """Source: backend/services/create_agent_tools.py::_assess_data_quality"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description="Profile table usage patterns: query frequency, common joins, active users.",
+)
+async def profile_table_usage(tables: list[str]) -> dict:
+    """Source: backend/services/create_agent_tools.py::_profile_table_usage"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(description="Execute a test SQL query and return results (read-only, max 5 rows).")
+async def test_sql(sql: str) -> dict:
+    """Source: backend/services/create_agent_tools.py::_test_sql
+
+    Phase 8: Replace with databricks_tools_core.sql.execute_sql
+    """
+    raise NotImplementedError("Phase 6/8")
+
+
+@creator.tool(description="List available SQL warehouses for the user.")
+async def discover_warehouses() -> dict:
+    """Source: backend/services/create_agent_tools.py::_discover_warehouses"""
+    raise NotImplementedError("Phase 6")
+
+
+# ── Config Generation Tools ──────────────────────────────────────────────────
+
+
+@creator.tool(description="Get the Genie Space configuration JSON schema reference.")
+async def get_config_schema() -> dict:
+    """Source: backend/services/create_agent_tools.py::_get_config_schema"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description=(
+        "Generate a complete Genie Space configuration from discovered "
+        "tables, inspection data, and user requirements."
+    ),
+    parameters=GenerateConfigArgs.model_json_schema(),
+)
+async def generate_config(**kwargs) -> dict:
+    """Source: backend/services/create_agent_tools.py::_generate_config (~lines 245-650)
+
+    This is the largest tool implementation. The LLM provides content;
+    this tool handles all structural formatting (JSON schema compliance,
+    column config normalization, instruction budget enforcement).
+
+    Integration pattern (Challenge 2):
+        Pydantic model auto-generates the JSON Schema for @app_agent
+        registration, replacing ~580 lines of hand-written schema.
+        Runtime validation catches malformed LLM output early.
+    """
+    args = GenerateConfigArgs(**kwargs)
+    # TODO Phase 6: move _generate_config implementation here
+    # args.tables, args.sample_questions, etc. are all validated
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description="Present the space creation plan to the user for review before generating config.",
+    parameters=GenerateConfigArgs.model_json_schema(),
+)
+async def present_plan(**kwargs) -> dict:
+    """Source: backend/services/create_agent_tools.py::_present_plan"""
+    args = GenerateConfigArgs(**kwargs)
+    # TODO Phase 6: move _present_plan implementation here
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(description="Validate a generated configuration against the Genie Space schema.")
+async def validate_config(config: dict) -> dict:
+    """Source: backend/services/create_agent_tools.py::_validate_config"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(description="Apply incremental updates to an existing generated configuration.")
+async def update_config(config: dict, updates: dict) -> dict:
+    """Source: backend/services/create_agent_tools.py::_update_config"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description="Create a new Genie Space in the workspace with the generated configuration.",
+)
+async def create_space(
+    display_name: str,
+    config: dict,
+    parent_path: str | None = None,
+    warehouse_id: str | None = None,
+) -> dict:
+    """Source: backend/services/create_agent_tools.py::_create_space + backend/genie_creator.py"""
+    raise NotImplementedError("Phase 6")
+
+
+@creator.tool(
+    description="Update an existing Genie Space with a modified configuration.",
+)
+async def update_space(space_id: str, config: dict) -> dict:
+    """Source: backend/services/create_agent_tools.py::_update_space"""
+    raise NotImplementedError("Phase 6")
+
+
+# ── Standalone entry point ───────────────────────────────────────────────────
+
+app = creator.app
diff --git a/agents/creator/schemas.py b/agents/creator/schemas.py
new file mode 100644
index 0000000..0e45db1
--- /dev/null
+++ b/agents/creator/schemas.py
@@ -0,0 +1,166 @@
+"""Pydantic models for creator agent tool schemas.
+
+These replace ~580 lines of hand-written JSON Schema in
+backend/services/create_agent_tools.py. The models serve double duty:
+
+1. Generate JSON Schema for @app_agent tool registration via
+   ``GenerateConfigArgs.model_json_schema()``
+2. Validate + parse incoming tool arguments at runtime via
+   ``GenerateConfigArgs(**kwargs)``
+
+Usage in agents/creator/app.py::
+
+    from agents.creator.schemas import GenerateConfigArgs
+
+    @creator.tool(
+        description="Generate a complete Genie Space configuration",
+        parameters=GenerateConfigArgs.model_json_schema(),
+    )
+    async def generate_config(**kwargs) -> dict:
+        args = GenerateConfigArgs(**kwargs)  # Validates at runtime
+        ...
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+# ── Shared nested types ──────────────────────────────────────────────────────
+
+
+class ColumnConfig(BaseModel):
+    """Column-level configuration within a table."""
+
+    column_name: str
+    description: Optional[str] = None
+    synonyms: Optional[list[str]] = None
+    exclude: Optional[bool] = None
+    enable_matching: Optional[bool] = None
+
+
+class SqlParameter(BaseModel):
+    """Parameter definition for parameterized example SQL."""
+
+    name: str
+    type_hint: str = Field(
+        ..., pattern="^(STRING|NUMBER|DATE|BOOLEAN)$"
+    )
+    description: Optional[str] = None
+    default_value: str
+
+
+class ExampleSql(BaseModel):
+    """Example SQL with natural-language question mapping."""
+
+    question: str
+    sql: str
+    usage_guidance: Optional[str] = None
+    parameters: Optional[list[SqlParameter]] = None
+
+
+class Measure(BaseModel):
+    """Aggregate measure definition (SUM, AVG, COUNT, etc.)."""
+
+    alias: str
+    sql: str
+    display_name: Optional[str] = None
+    synonyms: Optional[list[str]] = None
+    instruction: Optional[str] = None
+    comment: Optional[str] = None
+
+
+class Filter(BaseModel):
+    """Pre-defined filter (WHERE clause snippet)."""
+
+    display_name: str
+    sql: str
+    synonyms: Optional[list[str]] = None
+    instruction: Optional[str] = None
+    comment: Optional[str] = None
+
+
+class Expression(BaseModel):
+    """Computed expression (derived column)."""
+
+    alias: str
+    sql: str
+    display_name: Optional[str] = None
+    synonyms: Optional[list[str]] = None
+    instruction: Optional[str] = None
+    comment: Optional[str] = None
+
+
+class JoinSpec(BaseModel):
+    """Join specification between two tables."""
+
+    left_table: str
+    left_alias: str
+    right_table: str
+    right_alias: str
+    left_column: str
+    right_column: str
+    relationship: str = Field(
+        ...,
+        pattern="^(MANY_TO_ONE|ONE_TO_MANY|ONE_TO_ONE|MANY_TO_MANY)$",
+    )
+    instruction: Optional[str] = None
+    comment: Optional[str] = None
+
+
+class Benchmark(BaseModel):
+    """Question/SQL pair for evaluation benchmarks."""
+
+    question: str
+    expected_sql: str
+
+
+class MetricViewColumnConfig(BaseModel):
+    """Column configuration within a metric view."""
+
+    column_name: str
+    description: Optional[str] = None
+    enable_format_assistance: Optional[bool] = None
+
+
+class MetricView(BaseModel):
+    """Metric view definition (curated data view)."""
+
+    identifier: str
+    description: Optional[str] = None
+    column_configs: Optional[list[MetricViewColumnConfig]] = None
+
+
+class TableConfig(BaseModel):
+    """Table-level configuration with optional column configs."""
+
+    identifier: str
+    description: Optional[str] = None
+    column_configs: Optional[list[ColumnConfig]] = None
+
+
+# ── Top-level tool argument models ───────────────────────────────────────────
+
+
+class GenerateConfigArgs(BaseModel):
+    """Arguments for ``generate_config`` and ``present_plan`` tools.
+
+    These tools share the same schema — present_plan previews what
+    generate_config will produce.
+    """
+
+    tables: list[TableConfig]
+    sample_questions: Optional[list[str]] = None
+    text_instructions: Optional[list[str]] = None
+    example_sqls: Optional[list[ExampleSql]] = Field(
+        None, min_length=3
+    )
+    measures: Optional[list[Measure]] = None
+    filters: Optional[list[Filter]] = None
+    expressions: Optional[list[Expression]] = None
+    join_specs: Optional[list[JoinSpec]] = None
+    benchmarks: Optional[list[Benchmark]] = None
+    generate_benchmarks: Optional[bool] = None
+    metric_views: Optional[list[MetricView]] = None
diff --git a/agents/scorer/app.py b/agents/scorer/app.py
index 9819ea7..2789160 100644
--- a/agents/scorer/app.py
+++ b/agents/scorer/app.py
@@ -7,12 +7,21 @@
 
 This agent has NO LLM dependency — it's pure rule-based scoring.
 Lowest-risk extraction target; validates the @app_agent pattern.
+
+Integration patterns used:
+  - Challenge 1 (OBO auth): obo_context() bridges @app_agent → monolith auth
+  - Challenge 4 (SP fallback): genie_api_call() retries with SP on scope errors
+  - Challenge 5 (Lakebase): init_pool(SCORER_DDL) for idempotent schema setup
 """
 
 from __future__ import annotations
 
 from dbx_agent_app import AgentRequest, AgentResponse, app_agent
 
+from agents._shared.auth_bridge import obo_context
+from agents._shared.sp_fallback import genie_api_call
+from agents._shared.lakebase_client import init_pool, SCORER_DDL
+
 
 @app_agent(
     name="genie-scorer",
@@ -28,6 +37,14 @@ async def scorer(request: AgentRequest) -> AgentResponse:
     ...
 
 
+# ── Lifecycle ────────────────────────────────────────────────────────────────
+
+
+async def on_startup():
+    """Initialize Lakebase pool with scorer-specific DDL."""
+    await init_pool(SCORER_DDL)
+
+
 # ── Tools ────────────────────────────────────────────────────────────────────
 # Each tool maps to a current REST endpoint in backend/routers/spaces.py.
 # Domain logic lives in scanner.py (moved as-is from backend/services/).
@@ -40,12 +57,24 @@ async def scorer(request: AgentRequest) -> AgentResponse:
         "setup, SQL assets, optimization), and persists the result to Lakebase."
     ),
 )
-async def scan_space(space_id: str) -> dict:
-    """Source: backend/services/scanner.py::scan_space + backend/routers/spaces.py::trigger_scan"""
-    # Domain logic (scanner.calculate_score) moves here as-is.
-    # OBO auth: use request.user_context.access_token instead of ContextVar.
-    # Lakebase persistence: use local lakebase.py copy.
-    raise NotImplementedError("Phase 2: move scanner.py + lakebase.py here")
+async def scan_space(space_id: str, request: AgentRequest) -> dict:
+    """Source: backend/services/scanner.py::scan_space + backend/routers/spaces.py::trigger_scan
+
+    Integration pattern:
+        obo_context() sets up both monolith ContextVar and tools-core auth.
+        genie_api_call() auto-retries with SP on scope errors.
+        Domain logic (scanner.calculate_score) works unchanged.
+    """
+    with obo_context(request.user_context.access_token):
+        # Fetch space config (with automatic SP fallback for scope errors)
+        space_data = genie_api_call(
+            "GET",
+            f"/api/2.0/genie/spaces/{space_id}",
+            query={"include_serialized_space": "true"},
+        )
+        # TODO Phase 2: scanner.calculate_score(space_data)
+        # TODO Phase 2: save_scan_result(space_id, score)
+        raise NotImplementedError("Phase 2: move scanner.py + lakebase.py here")
 
 
 @scorer.tool(
diff --git a/agents/supervisor/__init__.py b/agents/supervisor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/supervisor/proxy.py b/agents/supervisor/proxy.py
new file mode 100644
index 0000000..62107de
--- /dev/null
+++ b/agents/supervisor/proxy.py
@@ -0,0 +1,179 @@
+"""Transparent proxy from supervisor to sub-agents.
+
+Maps current /api/* paths to sub-agent URLs so the React SPA needs
+zero changes. Handles JSON responses and SSE streaming.
+
+The route table is ordered — more specific paths match before general
+prefixes. Each entry maps a path prefix to an environment variable
+containing the sub-agent's base URL (set via agents.yaml url_env_map).
+
+Path → agent mapping derived from frontend/src/lib/api.ts (28 API calls).
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Optional
+
+import httpx
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+
+
+# Ordered route table: (pattern, env_var_or_None)
+# More specific patterns MUST come before general prefixes.
+# None means the supervisor handles the route directly (no proxy).
+ROUTE_TABLE: list[tuple[str, Optional[str]]] = [
+    # Specific sub-paths that override their parent prefix
+    ("/api/spaces/*/fix", "FIXER_URL"),      # fix agent (SSE)
+    ("/api/genie/create", "CREATOR_URL"),     # create Genie Space via API
+
+    # General prefixes
+    ("/api/spaces",    "SCORER_URL"),          # list, scan, history, star
+    ("/api/space",     "ANALYZER_URL"),        # fetch, parse
+    ("/api/analyze",   "ANALYZER_URL"),        # section, all, stream (SSE)
+    ("/api/genie",     "ANALYZER_URL"),        # query
+    ("/api/sql",       "ANALYZER_URL"),        # execute
+    ("/api/optimize",  "OPTIMIZER_URL"),       # stream optimization (SSE)
+    ("/api/config",    "OPTIMIZER_URL"),       # merge
+    ("/api/create",    "CREATOR_URL"),         # agent chat (SSE), discover, validate, create
+    ("/api/checklist", "ANALYZER_URL"),        # static content
+    ("/api/sections",  "ANALYZER_URL"),        # section list
+
+    # Supervisor-owned (no proxy)
+    ("/api/settings",  None),
+    ("/api/auth",      None),
+    ("/api/admin",     None),
+]
+
+# Pre-compile glob patterns (only "/api/spaces/*/fix" currently)
+_COMPILED_ROUTES: list[tuple[re.Pattern, Optional[str]]] = []
+
+for pattern, env_var in ROUTE_TABLE:
+    if "*" in pattern:
+        # Convert glob "*" to regex "[^/]+"
+        regex = "^" + re.escape(pattern).replace(r"\*", "[^/]+")
+        _COMPILED_ROUTES.append((re.compile(regex), env_var))
+    else:
+        # Simple prefix match
+        _COMPILED_ROUTES.append((re.compile("^" + re.escape(pattern)), env_var))
+
+
+def _resolve_upstream(path: str) -> Optional[str]:
+    """Find the upstream agent URL for a given API path.
+
+    Returns:
+        Base URL string if the path should be proxied.
+        None if the supervisor handles it directly.
+
+    Raises:
+        KeyError: If no route matches the path.
+    """
+    for compiled_pattern, env_var in _COMPILED_ROUTES:
+        if compiled_pattern.match(path):
+            if env_var is None:
+                return None
+            url = os.environ.get(env_var)
+            if not url:
+                return None
+            return url.rstrip("/")
+
+    raise KeyError(f"No route for {path}")
+
+
+# Hop-by-hop headers that should not be forwarded
+_HOP_HEADERS = frozenset({"host", "content-length", "transfer-encoding"})
+
+
+def mount_proxy(app: FastAPI):
+    """Mount the catch-all proxy route on a FastAPI app.
+
+    This should be mounted AFTER any supervisor-owned routes
+    (settings, auth, admin) so they take priority.
+    """
+
+    @app.api_route(
+        "/api/{path:path}",
+        methods=["GET", "POST", "PUT", "DELETE"],
+    )
+    async def proxy(request: Request, path: str):
+        full_path = f"/api/{path}"
+
+        try:
+            upstream_base = _resolve_upstream(full_path)
+        except KeyError:
+            return JSONResponse(
+                status_code=404,
+                content={"detail": f"No upstream agent for {full_path}"},
+            )
+
+        if upstream_base is None:
+            # Supervisor-owned route that wasn't caught by an explicit handler.
+            return JSONResponse(
+                status_code=404,
+                content={"detail": f"Not found: {full_path}"},
+            )
+
+        # Forward all headers except hop-by-hop
+        headers = {
+            k: v
+            for k, v in request.headers.items()
+            if k.lower() not in _HOP_HEADERS
+        }
+
+        upstream_url = f"{upstream_base}{full_path}"
+        if request.url.query:
+            upstream_url += f"?{request.url.query}"
+
+        body = await request.body()
+
+        # First, make a non-streaming request to check the content type
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            upstream_resp = await client.request(
+                method=request.method,
+                url=upstream_url,
+                headers=headers,
+                content=body,
+                follow_redirects=True,
+            )
+
+        content_type = upstream_resp.headers.get("content-type", "")
+
+        # SSE: re-issue as a streaming request and forward chunks
+        if "text/event-stream" in content_type:
+
+            async def stream():
+                async with httpx.AsyncClient(timeout=300.0) as sc:
+                    async with sc.stream(
+                        method=request.method,
+                        url=upstream_url,
+                        headers=headers,
+                        content=body,
+                    ) as sr:
+                        async for chunk in sr.aiter_bytes():
+                            yield chunk
+
+            return StreamingResponse(
+                stream(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+
+        # JSON: pass through with status code
+        if content_type.startswith("application/json"):
+            return JSONResponse(
+                status_code=upstream_resp.status_code,
+                content=upstream_resp.json(),
+                headers={"X-Upstream-Agent": upstream_base},
+            )
+
+        # Other content types: pass through as-is
+        return JSONResponse(
+            status_code=upstream_resp.status_code,
+            content={"raw": upstream_resp.text},
+            headers={"X-Upstream-Agent": upstream_base},
+        )
diff --git a/docs/architecture-proposal.md b/docs/architecture-proposal.md
index 1069b32..4a902b0 100644
--- a/docs/architecture-proposal.md
+++ b/docs/architecture-proposal.md
@@ -448,43 +448,99 @@ This replaces the current "manual curl and check" testing with automated, repeat
 
 ---
 
-## Integration Challenges
+## Integration Challenges — Concrete Solutions
 
-### 1. OBO Tokens in Streaming Generators
+The three auth systems that need bridging:
+- **Monolith auth** (`backend/services/auth.py:25`): `_obo_client` ContextVar → `WorkspaceClient`
+- **`@app_agent`** (`dbx_agent_app/core/types.py:32`): `request.user_context` → `UserContext` with `.access_token`
+- **`databricks-tools-core`** (`databricks_tools_core/auth.py:35-36`): `_host_ctx`/`_token_ctx` ContextVars via `set_databricks_auth()`
 
-The creator agent's tool-calling loop needs the user's OBO token across multiple LLM rounds within a single SSE stream. `@app_agent` provides `request.user_context`, but we need to pass the token into the agent session and re-establish it per-round.
+### 1. OBO Auth Bridge → `agents/_shared/auth_bridge.py`
 
-**Solution:** Pass `user_context.access_token` into the agent session object. Each tool call creates a fresh `WorkspaceClient(token=session.access_token)`.
+**Problem:** Each agent receives `request.user_context` from `@app_agent`, but domain logic calls `get_workspace_client()` from the monolith's auth module. During migration, both patterns need to work. And `databricks-tools-core` functions use their own separate ContextVars.
 
-### 2. Complex Tool Schemas
+**Solution:** `obo_context()` context manager that sets up all three auth systems in one `with` block:
 
-`generate_config` has 10+ nested parameters (tables with column configs, SQL snippets with expressions/measures/filters, etc.). The `@agent.tool()` decorator auto-generates schemas from type hints, but deeply nested structures need Pydantic models:
+```python
+from agents._shared.auth_bridge import obo_context
+
+@scorer.tool(description="Run IQ scan on a Genie Space")
+async def scan_space(space_id: str, request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        # All of these now work:
+        # - monolith's get_workspace_client() returns OBO client
+        # - databricks-tools-core functions use OBO token
+        result = scanner.calculate_score(space_id)
+```
+
+For streaming generators, capture the token before the generator starts and re-enter `obo_context()` per-yield (same pattern as `backend/routers/create.py:125-198`).
+
+### 2. Complex Tool Schemas → `agents/creator/schemas.py`
+
+**Problem:** `generate_config` has 11 parameters with 4-5 nesting levels. `@app_agent`'s schema generator only handles primitives. Hand-maintaining 200+ line JSON schemas is fragile.
+
+**Solution:** Pydantic models that auto-generate JSON Schema via `.model_json_schema()`, passed to `@creator.tool(parameters=...)`:
 
 ```python
-class TableConfig(BaseModel):
-    identifier: str
-    description: str = ""
-    column_configs: list[ColumnConfig] = []
+from agents.creator.schemas import GenerateConfigArgs
 
-@creator.tool(description="Generate a complete Genie Space configuration")
-async def generate_config(tables: list[TableConfig], ...) -> dict:
-    ...
+@creator.tool(
+    description="Generate a Genie Space configuration",
+    parameters=GenerateConfigArgs.model_json_schema(),
+)
+async def generate_config(**kwargs) -> dict:
+    args = GenerateConfigArgs(**kwargs)  # Validate at runtime
 ```
 
-### 3. Frontend Transparency
+Cuts ~580 lines of JSON schema to ~80 lines of Pydantic models, and the schema is always in sync with runtime validation.
+
+### 3. Frontend Transparency → `agents/supervisor/proxy.py`
+
+**Problem:** The React SPA makes 28 API calls to `/api/*` that route to 5 different sub-agents after decomposition. The frontend should not change.
 
-The React SPA currently hits `/api/spaces/*`, `/api/analysis/*`, `/api/create/*`. Two options:
+**Solution:** Ordered route table with prefix matching, glob support for path parameters, and SSE stream detection:
 
-1. **Supervisor proxy** (recommended): Supervisor exposes the same paths and routes to sub-agents. Zero frontend changes.
-2. **Direct sub-agent calls**: Frontend API client updated to call sub-agent URLs. Requires frontend changes but eliminates proxy latency.
+```python
+ROUTE_TABLE = [
+    ("/api/spaces/*/fix", "FIXER_URL"),   # specific before general
+    ("/api/genie/create", "CREATOR_URL"),
+    ("/api/spaces",       "SCORER_URL"),
+    ("/api/analyze",      "ANALYZER_URL"),
+    ("/api/create",       "CREATOR_URL"),
+    # ... etc
+]
+```
 
-### 4. SP Fallback for OAuth Scope Gaps
+SSE streams are detected by `content-type: text/event-stream` and forwarded as chunked bytes. OBO headers pass through automatically.
 
-PRs #7/#8 added a `get_service_principal_client()` + `_is_scope_error()` pattern: when the user's OBO token lacks the `genie` OAuth scope, the code retries with the app's service principal. This pattern is currently duplicated in `genie_client.py` (`get_genie_space`, `list_genie_spaces`) and `routers/spaces.py` (`get_space_detail`). In the multi-agent model, `@app_agent` may handle this differently — we need to verify whether the framework supports automatic SP fallback or if we keep this pattern in the domain logic.
+### 4. SP Fallback Decorator → `agents/_shared/sp_fallback.py`
 
-### 5. Shared Lakebase
+**Problem:** The `_is_scope_error()` + retry-with-SP pattern is duplicated across `genie_client.py` and `spaces.py`. Each agent that calls Genie APIs needs this pattern.
+
+**Solution:** `@with_sp_fallback` decorator and `genie_api_call()` convenience function:
+
+```python
+from agents._shared.sp_fallback import genie_api_call
+
+# One-liner with automatic SP fallback
+space = genie_api_call("GET", f"/api/2.0/genie/spaces/{space_id}",
+                       query={"include_serialized_space": "true"})
+```
+
+### 5. Shared Lakebase Pool → `agents/_shared/lakebase_client.py`
+
+**Problem:** Multiple agents need Lakebase (scorer for scores/stars, creator for sessions). Each runs as a separate Databricks App with its own credentials.
+
+**Solution:** Shared pool lifecycle + idempotent DDL per agent:
+
+```python
+from agents._shared.lakebase_client import init_pool, SCORER_DDL
+
+# At startup — creates tables if they don't exist
+await init_pool(SCORER_DDL)
+```
 
-Multiple agents need Lakebase access (scorer for scores/stars, creator for sessions). Each agent gets its own Lakebase credentials via `app.yaml` resource bindings. The shared `lakebase.py` module moves to a small shared library or gets duplicated per-agent (it's only 269 lines).
+Each agent initializes its own pool from its own env vars. Domain-specific query functions stay in each agent's module. The shared client manages pool lifecycle, credential generation, and DDL only.
 
 ---
 
@@ -521,7 +577,7 @@ Multiple agents need Lakebase access (scorer for scores/stars, creator for sessi
 
 ## Files in This PR
 
-### New files
+### New files — scaffolds + deployment
 - `docs/architecture-proposal.md` — this document
 - `agents.yaml` — multi-agent deployment config
 - `agents/scorer/app.py` — scorer agent scaffold
@@ -535,5 +591,19 @@ Multiple agents need Lakebase access (scorer for scores/stars, creator for sessi
 - `agents/fixer/app.py` — fixer agent scaffold
 - `agents/fixer/app.yaml` — fixer Databricks Apps config
 
-### No modified files
-This is a proposal PR — the existing monolith is untouched. All new files are additive.
+### New files — integration challenge solutions
+- `agents/_shared/__init__.py` — shared utilities package
+- `agents/_shared/auth_bridge.py` — Challenge 1: OBO auth context manager bridging all 3 auth systems
+- `agents/_shared/sp_fallback.py` — Challenge 4: SP fallback decorator for Genie API scope errors
+- `agents/_shared/lakebase_client.py` — Challenge 5: Shared Lakebase pool with idempotent DDL
+- `agents/creator/schemas.py` — Challenge 2: Pydantic models replacing ~580 lines of JSON schemas
+- `agents/supervisor/__init__.py` — supervisor package
+- `agents/supervisor/proxy.py` — Challenge 3: Frontend-transparent proxy with SSE support
+
+### Modified files
+- `agents/scorer/app.py` — wired up auth_bridge, sp_fallback, and lakebase_client imports
+- `agents/creator/app.py` — uses Pydantic schema override for generate_config/present_plan
+- `docs/architecture-proposal.md` — replaced placeholder challenge descriptions with concrete solutions
+
+### No changes to existing monolith
+The existing `backend/` code is untouched. All new files are additive.

From 9eb8ea2d030ff06c1a4960e45f0f55b012b9a1f0 Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Wed, 11 Mar 2026 09:37:43 -0700
Subject: [PATCH 4/6] =?UTF-8?q?docs:=20clarify=20Challenge=202=20before/af?=
 =?UTF-8?q?ter=20(580=20=E2=86=92=2080=20lines)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/architecture-proposal.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/architecture-proposal.md b/docs/architecture-proposal.md
index 4a902b0..be47223 100644
--- a/docs/architecture-proposal.md
+++ b/docs/architecture-proposal.md
@@ -477,9 +477,9 @@ For streaming generators, capture the token before the generator starts and re-e
 
 ### 2. Complex Tool Schemas → `agents/creator/schemas.py`
 
-**Problem:** `generate_config` has 11 parameters with 4-5 nesting levels. `@app_agent`'s schema generator only handles primitives. Hand-maintaining 200+ line JSON schemas is fragile.
+**Problem:** `generate_config` has 11 parameters with 4-5 nesting levels (tables → column configs, example SQLs → parameters, etc.). `@app_agent`'s schema generator only handles primitives. The monolith defines these schemas as **~580 lines of hand-written JSON** in `create_agent_tools.py` — brittle, hard to maintain, and easy to get out of sync with the runtime code.
 
-**Solution:** Pydantic models that auto-generate JSON Schema via `.model_json_schema()`, passed to `@creator.tool(parameters=...)`:
+**Solution:** **~80 lines of Pydantic models** that auto-generate the equivalent JSON Schema via `.model_json_schema()` and double as runtime validation:
 
 ```python
 from agents.creator.schemas import GenerateConfigArgs
@@ -492,7 +492,7 @@ async def generate_config(**kwargs) -> dict:
     args = GenerateConfigArgs(**kwargs)  # Validate at runtime
 ```
 
-Cuts ~580 lines of JSON schema to ~80 lines of Pydantic models, and the schema is always in sync with runtime validation.
+580 lines of hand-maintained JSON → 80 lines of Pydantic models. Schema and validation are always in sync because they come from the same source.
 
 ### 3. Frontend Transparency → `agents/supervisor/proxy.py`
 

From 3b1bef5c37da3f9175607524280acb287744e3ed Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Mon, 23 Mar 2026 16:36:31 -0700
Subject: [PATCH 5/6] docs: reframe agent architecture as additive deployment
 layer

Rewrite architecture-proposal.md and CLAUDE.md to position the agent
layer as an enhancement that wraps existing domain logic, not a
migration that replaces it. Existing monolith stays unchanged.

Co-authored-by: Isaac
---
 CLAUDE.md                     | 168 +++++++++++--
 docs/architecture-proposal.md | 458 +++++++++++-----------------------
 2 files changed, 282 insertions(+), 344 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 35cd56e..7d201b4 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,41 +1,159 @@
 # Genie Workbench
 
-## Project Overview
+Databricks App for creating, scoring, and optimizing Genie Spaces. FastAPI backend + React/Vite frontend deployed together on Databricks Apps.
 
-Genie Workbench is a Databricks App that acts as a quality control and optimization platform for Genie Space administrators. It helps builders understand why their Genie Space isn't performing well and fix it.
+## Commands
 
-- **Backend:** Python (FastAPI), deployed as a Databricks App
-- **Frontend:** React/TypeScript (Vite)
-- **Storage:** Lakebase (with in-memory fallback for local dev)
-- **Tracing:** Optional MLflow integration
+```bash
+# Backend (from project root)
+uv pip install -e .                          # Install Python deps
+uvicorn backend.main:app --host 0.0.0.0 --port 8000 --reload  # Dev server
 
-## GenieRX Specification
+# Frontend (from frontend/)
+cd frontend && npm install && npm run build  # Build for production
+cd frontend && npm run dev                   # Vite dev server (port 5173, proxies /api to :8000)
+cd frontend && npm run lint                  # ESLint
 
-The GenieRX spec (`docs/genierx-spec.md`) defines the core analysis and recommendation framework used throughout this project. **Always consult it when working on analysis, scoring, or recommendation features.**
+# Full build (what Databricks Apps runs)
+npm install   # Triggers postinstall -> cd frontend && npm install
+npm run build # Triggers cd frontend && npm run build
 
-Key concepts from the spec:
+# Deploy
+databricks sync --watch . /Workspace/Users/<email>/genie-workbench
+databricks apps deploy <app-name> --source-code-path /Workspace/Users/<email>/genie-workbench
 
-- **Authoritative Facts** — raw data from systems of record, safe to surface directly
-- **Canonical Metrics** — governed KPIs with stable definitions and cross-team agreement
-- **Heuristic Signals** — derived fields with subjective thresholds; must always carry caveats
+# Tests (require running backend at localhost:8000)
+python tests/test_e2e_local.py    # E2E create agent tests
+python tests/test_full_schema.py  # Schema validation
+# Deployed E2E tests require: pip install playwright && playwright install chromium
+python tests/test_e2e_deployed.py
+```
 
-When implementing or modifying any analyzer, scorer, or recommender logic, ensure field classifications align with this taxonomy. Heuristic signals must never be presented as authoritative facts in Genie answers.
+## Architecture
 
-## Key Documentation
+```
+backend/
+  main.py                  # FastAPI app entry point, OBO middleware, static file serving
+  models.py                # All Pydantic models (shared between routers/services)
+  routers/
+    analysis.py            # /api/space/*, /api/analyze/*, /api/optimize, /api/genie/*, /api/sql/*
+    spaces.py              # /api/spaces/* (list, scan, history, star, fix)
+    admin.py               # /api/admin/* (dashboard, leaderboard, alerts)
+    auth.py                # /api/auth/me
+    create.py              # /api/create/* (agent chat, UC discovery, wizard)
+  services/
+    auth.py                # OBO auth (ContextVar), SP fallback, WorkspaceClient mgmt
+    genie_client.py        # Databricks Genie API (fetch space, list spaces, query for SQL)
+    scanner.py             # Rule-based IQ scoring engine (0-100, 4 dimensions)
+    analyzer.py            # LLM-based deep analysis against best-practices checklist
+    optimizer.py           # LLM-based optimization from benchmark feedback
+    fix_agent.py           # LLM agent that generates JSON patches and applies via Genie API
+    create_agent.py        # Multi-turn LLM agent for creating new Genie Spaces
+    create_agent_session.py # Session persistence for create agent (Lakebase)
+    create_agent_tools.py  # Tool definitions for create agent (UC discovery, SQL, etc.)
+    lakebase.py            # PostgreSQL persistence (asyncpg pool, in-memory fallback)
+    llm_utils.py           # OpenAI-compatible LLM client via Databricks serving endpoints
+    uc_client.py           # Unity Catalog browsing (catalogs, schemas, tables)
+  prompts/                 # Prompt templates for analysis
+  prompts_create/          # Prompt templates for create agent (multi-file, modular)
+  references/schema.md     # Genie Space JSON schema reference
+frontend/
+  src/
+    App.tsx                # Root: SpaceList | SpaceDetail | AdminDashboard | CreateAgentChat
+    lib/api.ts             # All API calls (fetch, SSE streaming helpers)
+    types/index.ts         # TypeScript types mirroring backend Pydantic models
+    components/            # UI components (analysis, optimization, fix agent, etc.)
+    pages/                 # SpaceList, SpaceDetail, AdminDashboard, HistoryTab, IQScoreTab
+    hooks/                 # useAnalysis, useTheme
+  vite.config.ts           # Vite config with /api proxy to localhost:8000
+```
 
-- `docs/genierx-spec.md` — GenieRX analyzer/recommender specification
-- `docs/genie-space-schema.md` — Genie space schema reference
-- `docs/checklist-by-schema.md` — Analysis checklist organized by schema section
-- `CUJ.md` — Core user journeys and product analysis
+## Key Patterns
 
-## Development
+### Authentication (OBO)
+On Databricks Apps, user identity flows via `x-forwarded-access-token` header. `OBOAuthMiddleware` in `main.py` stores the token in a `ContextVar`. All services call `get_workspace_client()` which returns the OBO client if set, otherwise the SP singleton. Some Genie API calls require SP auth (missing `genie` OAuth scope) — see `_is_scope_error()` fallback in `genie_client.py`.
 
-```bash
-# Backend (from repo root)
-uv run start-server
+### SSE Streaming
+Multiple endpoints use `StreamingResponse` with `text/event-stream`:
+- `/api/analyze/stream` — analysis progress
+- `/api/optimize` — optimization with heartbeat keepalives (15s)
+- `/api/spaces/{id}/fix` — fix agent patches
+- `/api/create/agent/chat` — multi-turn agent with typed events (session, step, thinking, tool_call, tool_result, message_delta, message, created, error, done)
+
+Frontend consumes these via manual `fetch` + `ReadableStream` in `lib/api.ts` (not EventSource). Buffer splitting on `\n\n`.
+
+### Lakebase Persistence
+`services/lakebase.py` uses asyncpg with graceful fallback to in-memory dicts when `LAKEBASE_HOST` is not set. Credentials auto-generated via Databricks SDK (`/api/2.0/database/credentials`). Schema defined in `sql/setup_lakebase.sql`.
+
+### LLM Calls
+All LLM calls go through Databricks model serving endpoints using OpenAI-compatible API. Model configured via `LLM_MODEL` env var (default: `databricks-claude-sonnet-4-6`). MLflow tracing is optional — controlled by `MLFLOW_EXPERIMENT_ID`.
+
+## Environment Variables
+
+Defined in `app.yaml`. Key ones:
+- `SQL_WAREHOUSE_ID` — from app resource `sql-warehouse`
+- `LLM_MODEL` — serving endpoint name
+- `LAKEBASE_HOST`, `LAKEBASE_PORT`, `LAKEBASE_DATABASE`, `LAKEBASE_INSTANCE_NAME` — Lakebase config
+- `MLFLOW_EXPERIMENT_ID` — enables MLflow tracing (validated at startup, cleared if invalid)
+- `GENIE_TARGET_DIRECTORY` — where new spaces are created (default `/Shared/`)
+- `DEV_USER_EMAIL` — local dev only
+
+Local dev uses `.env.local` (loaded first with override) then `.env`.
+
+## Dev/Test Workflow
+
+There is no local dev server — all testing is done by syncing code to Databricks and redeploying:
+
+1. Edit code locally
+2. `databricks sync --watch . /Workspace/Users/<email>/genie-workbench` picks up changes automatically
+3. Re-run `databricks apps deploy <app-name> --source-code-path /Workspace/Users/<email>/genie-workbench` to trigger a new deployment
+4. Test in the deployed Databricks App
+
+Do NOT suggest running `uvicorn` or `npm run dev` locally. The app depends on Databricks-managed resources (OBO auth, Lakebase, serving endpoints) that aren't available outside a Databricks App environment.
+
+## Gotchas
+
+- **frontend/dist/ is gitignored but NOT databricksignored** — the built React app must be synced to workspace for deployment. Build before `databricks sync`.
+- **`.databricksignore` excludes `*.md`** but explicitly includes `backend/references/schema.md` (needed at runtime by the analyzer).
+- **OBO ContextVar and streaming** — for SSE endpoints, the ContextVar is NOT cleared after `call_next` because the response streams lazily. Streaming handlers stash the token on `request.state` and re-set it inside the generator.
+- **Two separate "analysis" paths** — IQ Scan (`scanner.py`, rule-based, instant) and Deep Analysis (`analyzer.py`, LLM-based, streaming). They produce different outputs and don't cross-reference.
+- **Two separate "fix" paths** — Fix Agent (from scan findings, auto-applies patches) and Optimize flow (from benchmark labeling, produces suggestions for a new space). They're independent.
+- **Vite proxy** — dev frontend at :5173 proxies `/api` to :8000. In production, FastAPI serves static files from `frontend/dist/` directly.
+- **Python 3.11+** required (`pyproject.toml`). Uses `uv` for dependency management (`uv.lock` present).
+- **Root `package.json`** exists solely as a build hook for Databricks Apps — `postinstall` chains to `frontend/npm install`, `build` chains to `frontend/npm run build`.
+
+## Agent Deployment Layer
+
+The `agents/` directory provides an optional agent deployment layer using `@app_agent` from `dbx-agent-app`. Each agent wraps existing domain logic from `backend/services/` and exposes it as a standalone Databricks agent with A2A discovery, MCP server, and eval support.
+
+See `docs/architecture-proposal.md` for the full design and implementation roadmap.
 
-# Frontend
-cd frontend && npm run dev
 ```
+agents/
+  _shared/              # Auth bridge, Lakebase pool, SP fallback
+  scorer/app.py         # Wraps scanner.py
+  analyzer/app.py       # Wraps analyzer.py
+  creator/app.py        # Wraps create_agent.py
+  optimizer/app.py      # Wraps optimizer.py
+  fixer/app.py          # Wraps fix_agent.py
+  supervisor/proxy.py   # Frontend proxy for agent deployment mode
+agents.yaml             # Multi-agent deployment config
+```
+
+## GenieRX Specification
+
+`docs/genierx-spec.md` defines the analysis and recommendation taxonomy. Key concepts:
+- **Authoritative Facts** — raw data from systems of record, safe to surface directly
+- **Canonical Metrics** — governed KPIs with stable definitions
+- **Heuristic Signals** — derived fields with subjective thresholds; must carry caveats
+
+Consult the spec when working on analysis, scoring, or recommendation features.
+
+## Code Style
 
-Frontend runs at `localhost:5173`, proxies API calls to backend at `localhost:8000`.
+- Backend: Python, Pydantic models, FastAPI routers, no class-based views
+- Frontend: React 19 + TypeScript + Tailwind CSS v4 + Vite 7, functional components only
+- UI primitives in `frontend/src/components/ui/` (button, card, badge, etc.) using `class-variance-authority`
+- Path alias `@` maps to `frontend/src/` (configured in `vite.config.ts` and `tsconfig.app.json`)
+- All API routes prefixed with `/api`
+- Pydantic models in `backend/models.py`, TypeScript mirrors in `frontend/src/types/index.ts` — keep in sync
diff --git a/docs/architecture-proposal.md b/docs/architecture-proposal.md
index be47223..8e547b7 100644
--- a/docs/architecture-proposal.md
+++ b/docs/architecture-proposal.md
@@ -1,145 +1,92 @@
-# Genie Workbench → Multi-Agent Architecture
+# Agent Deployment Layer for Genie Workbench
 
 > **Status:** Proposal
 > **Author:** Stuart Gano
-> **Audience:** Sean Zhang (Workbench maintainer)
 > **Date:** 2026-03-10
 
 ---
 
-## Executive Summary
+## Summary
 
-The Genie Workbench is a monolithic Databricks App (~10,200 lines backend) that hand-rolls OBO auth, tool-calling loops, SSE streaming, and SDK wrappers. Two FE-built libraries solve these exact problems:
+The Genie Workbench now has scoring, analysis, optimization, creation, and auto-optimization all working as a Databricks App. This proposal adds an **agent deployment layer** so each capability can also be deployed as a standalone Databricks agent — enabling A2A discovery, MCP tool integration, and independent `mlflow.genai.evaluate()` testing.
 
-- **AI Dev Kit** (`databricks-tools-core`) — pre-built Python functions for SQL execution, Unity Catalog browsing, and warehouse management
-- **dbx-agent-app** — `@app_agent` decorator that auto-generates `/invocations` endpoints, agent cards, MCP servers, health checks, and handles OBO auth
+The existing backend is unchanged. The agent layer wraps existing domain logic using:
 
-This proposal refactors the Workbench into a **multi-agent system** where each capability is a separate, discoverable `@app_agent` app. The result: ~30% less code, free MCP servers, A2A discovery, and `mlflow.genai.evaluate()` support — with zero changes to the React frontend.
+- **`dbx-agent-app`** (`@app_agent` decorator) — auto-generates `/invocations` endpoints, agent cards, MCP servers, and health checks
+- **AI Dev Kit** (`databricks-tools-core`) — optional drop-in replacements for UC browsing and SQL execution
 
----
-
-## Current Architecture (Monolith)
-
-```
-┌─────────────────────────────────────────────────┐
-│  backend/main.py (FastAPI)                      │
-│                                                 │
-│  ┌──────────────────────────────────────────┐   │
-│  │ OBOAuthMiddleware                        │   │
-│  │ (hand-rolled ContextVar + x-forwarded-   │   │
-│  │  access-token extraction)                │   │
-│  └──────────────────────────────────────────┘   │
-│                                                 │
-│  ┌──────────┐ ┌──────────┐ ┌───────────────┐   │
-│  │ routers/ │ │ routers/ │ │ routers/      │   │
-│  │ spaces   │ │ analysis │ │ create        │   │
-│  │ (scan,   │ │ (analyze,│ │ (UC discovery │   │
-│  │  history, │ │  stream, │ │  agent chat,  │   │
-│  │  star,   │ │  query,  │ │  validate,    │   │
-│  │  fix)    │ │  optimize│ │  create)      │   │
-│  └──────────┘ └──────────┘ └───────────────┘   │
-│                                                 │
-│  ┌──────────────────────────────────────────┐   │
-│  │ services/                                │   │
-│  │ scanner.py  analyzer.py  optimizer.py    │   │
-│  │ fix_agent.py  create_agent.py            │   │
-│  │ create_agent_tools.py (2,717 lines!)     │   │
-│  │ create_agent_session.py                  │   │
-│  │ uc_client.py  sql_executor.py            │   │
-│  │ genie_client.py  lakebase.py  auth.py    │   │
-│  └──────────────────────────────────────────┘   │
-│                                                 │
-│  frontend/dist/ (React SPA, static files)       │
-└─────────────────────────────────────────────────┘
-```
-
-### Pain points
+**What this enables:**
+- Other workspace apps can discover and call Workbench capabilities via A2A protocol
+- Each agent gets a free MCP server for tool integration
+- Automated eval pipelines via `mlflow.genai.evaluate()` against individual agents
+- Independent deployment of individual capabilities when needed
 
-| Issue | Impact |
-|-------|--------|
-| `create_agent_tools.py` is 2,717 lines of hand-coded tool definitions + JSON schemas + dispatch table | Every new tool requires ~80 lines of boilerplate |
-| OBO auth in `services/auth.py` (136 lines) uses ContextVar + middleware — breaks in streaming generators | Streaming endpoints need manual `set_obo_user_token()` re-establishment. Recent fix added `get_service_principal_client()` fallback for missing OAuth scopes |
-| `genie_client.py` (244 lines) duplicates SP-fallback pattern (`_is_scope_error`) in every API call | Each new Genie API function must remember to add scope-error retry logic |
-| `sql_executor.py` (220 lines) reimplements what `databricks-tools-core.sql` provides | Maintenance burden, no warehouse auto-detection improvements |
-| `uc_client.py` (60 lines) reimplements what `databricks-tools-core.unity_catalog` provides | Duplicated effort |
-| No agent discovery — other workspace apps can't call Workbench capabilities | Siloed functionality |
-| No eval support — testing requires manual curl/browser interaction | No regression testing pipeline |
-| Monolithic deployment — any change redeploys everything | Slow iteration on individual capabilities |
+**What this does NOT change:**
+- The existing monolith deployment continues to work as-is
+- The React frontend is unmodified
+- All existing domain logic (scanner, analyzer, optimizer, fix agent, create agent, GSO) stays in place
 
 ---
 
-## Proposed Architecture (Multi-Agent)
+## Architecture
+
+The agent layer sits alongside the existing monolith. Both deployment modes work:
 
 ```
-┌─────────────────────────────────────────────────────────┐
-│  genie-workbench (supervisor)                           │
-│  React SPA + FastAPI shell                              │
-│  Routes frontend API calls → sub-agent /invocations     │
-├─────────────────────────────────────────────────────────┤
-│                                                         │
-│  ┌──────────┐  ┌──────────┐  ┌──────────┐             │
-│  │ genie-   │  │ genie-   │  │ genie-   │             │
-│  │ scorer   │  │ analyzer │  │ creator  │             │
-│  │          │  │          │  │          │             │
-│  │ IQ scan  │  │ LLM deep │  │ Space    │             │
-│  │ scoring  │  │ analysis │  │ creation │             │
-│  │ history  │  │ synthesis│  │ wizard   │             │
-│  └──────────┘  └──────────┘  └──────────┘             │
-│                                                         │
-│  ┌──────────┐  ┌──────────┐                            │
-│  │ genie-   │  │ genie-   │                            │
-│  │ optimizer│  │ fixer    │                            │
-│  │          │  │          │                            │
-│  │ Benchmark│  │ AI fix   │                            │
-│  │ labeling │  │ agent    │                            │
-│  │ suggest  │  │ patches  │                            │
-│  └──────────┘  └──────────┘                            │
-└─────────────────────────────────────────────────────────┘
+┌─────────────────────────────────────────────────────────────────┐
+│  EXISTING: Monolith (unchanged)                                 │
+│  backend/main.py → routers → services → frontend/dist           │
+│  Deployed via: databricks apps deploy                           │
+└─────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────────────────────────────────────────────────┐
+│  NEW: Agent Layer (additive)                                    │
+│  Deployed via: dbx-agent-app deploy --config agents.yaml        │
+│                                                                 │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                     │
+│  │ genie-   │  │ genie-   │  │ genie-   │                     │
+│  │ scorer   │  │ analyzer │  │ creator  │                     │
+│  │ wraps:   │  │ wraps:   │  │ wraps:   │                     │
+│  │ scanner  │  │ analyzer │  │ create_  │                     │
+│  │ .py      │  │ .py      │  │ agent.py │                     │
+│  └──────────┘  └──────────┘  └──────────┘                     │
+│                                                                 │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐                     │
+│  │ genie-   │  │ genie-   │  │supervisor│                     │
+│  │ optimizer│  │ fixer    │  │ React SPA│                     │
+│  │ wraps:   │  │ wraps:   │  │ + proxy  │                     │
+│  │ optimizer│  │ fix_agent│  │          │                     │
+│  │ .py      │  │ .py      │  │          │                     │
+│  └──────────┘  └──────────┘  └──────────┘                     │
+└─────────────────────────────────────────────────────────────────┘
 ```
 
-Each sub-agent is a standalone Databricks App with:
+Each agent wraps existing domain logic and adds:
 - **`@app_agent` decorator** — auto-generates `/invocations`, `/.well-known/agent.json`, `/health`, MCP server
-- **OBO auth** — handled by `request.user_context` (replaces ContextVar middleware)
-- **Tool definitions** — auto-generated from `@agent.tool()` decorated functions (replaces JSON schemas)
+- **OBO auth** — `request.user_context` bridges into existing auth via `obo_context()`
+- **Tool definitions** — auto-generated from `@agent.tool()` decorated functions
 - **Eval support** — `app_predict_fn()` bridge to `mlflow.genai.evaluate()`
 
 ---
 
 ## Agent Decomposition
 
-### Agent Boundaries
-
-| Agent | Source | Tools | Needs Lakebase? | Streaming? | LLM? |
-|-------|--------|-------|-----------------|------------|------|
-| **genie-scorer** | `agents/scorer/` | `scan_space`, `get_history`, `toggle_star`, `list_spaces` | Yes (scores, stars) | No | No |
-| **genie-analyzer** | `agents/analyzer/` | `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql` | No | Yes (SSE) | Yes |
-| **genie-creator** | `agents/creator/` | All 16 current tools (discover_*, describe_*, profile_*, generate_config, etc.) | Yes (sessions) | Yes (SSE) | Yes |
-| **genie-optimizer** | `agents/optimizer/` | `generate_suggestions`, `merge_config`, `label_benchmark` | No | No (heartbeat SSE) | Yes |
-| **genie-fixer** | `agents/fixer/` | `generate_fixes`, `apply_patch` | No | Yes (SSE) | Yes |
-| **supervisor** | root `app.py` | Routes to sub-agents, serves React SPA, `/api/settings`, `/api/auth` | Yes (starred) | Proxy | No |
+### What each agent wraps
 
-### What moves where
+| Agent | Wraps | Tools exposed | Lakebase? | Streaming? | LLM? |
+|-------|-------|---------------|-----------|------------|------|
+| **genie-scorer** | `services/scanner.py` | `scan_space`, `get_history`, `toggle_star`, `list_spaces` | Yes (scores, stars) | No | No |
+| **genie-analyzer** | `services/analyzer.py`, `services/genie_client.py` | `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql` | No | Yes (SSE) | Yes |
+| **genie-creator** | `services/create_agent.py`, `services/create_agent_tools.py` | All 16 current tools (discover_*, describe_*, profile_*, generate_config, etc.) | Yes (sessions) | Yes (SSE) | Yes |
+| **genie-optimizer** | `services/optimizer.py` | `generate_suggestions`, `merge_config`, `label_benchmark` | No | No (heartbeat SSE) | Yes |
+| **genie-fixer** | `services/fix_agent.py` | `generate_fixes`, `apply_patch` | No | Yes (SSE) | Yes |
+| **supervisor** | Existing React SPA | Routes frontend API calls to sub-agents, serves static files | Yes (starred) | Proxy | No |
 
-```
-backend/services/scanner.py        → agents/scorer/scanner.py       (as-is, domain logic)
-backend/services/analyzer.py       → agents/analyzer/analyzer.py    (as-is, domain logic)
-backend/services/optimizer.py      → agents/optimizer/optimizer.py  (as-is, domain logic)
-backend/services/fix_agent.py      → agents/fixer/fix_agent.py     (as-is, domain logic)
-backend/services/create_agent.py   → agents/creator/agent.py       (as-is, domain logic)
-backend/services/create_agent_session.py → agents/creator/session.py (as-is)
-backend/prompts_create/            → agents/creator/prompts/        (as-is)
-backend/references/                → agents/creator/references/     (as-is)
-
-backend/services/uc_client.py      → DELETED (replaced by databricks-tools-core)
-backend/sql_executor.py            → DELETED (replaced by databricks-tools-core)
-backend/routers/spaces.py          → DISSOLVED (endpoints become scorer/supervisor tools)
-backend/routers/analysis.py        → DISSOLVED (endpoints become analyzer/optimizer tools)
-backend/routers/create.py          → DISSOLVED (endpoints become creator tools)
-```
+Each agent imports from `backend/services/` — the domain logic stays where it is. The agent layer is a thin wrapper that exposes existing functions as agent tools with standard protocol support.
 
-### What stays custom (irreplaceable domain logic)
+### Domain logic (unchanged)
 
-These files contain business logic specific to GenieIQ/GenieRx and move to their respective agents unchanged:
+These files contain the business logic that agents wrap. They are not modified:
 
 - `scanner.py` — Rule-based IQ scoring (maturity levels, dimension weights)
 - `analyzer.py` — LLM checklist evaluation with session management
@@ -149,41 +96,18 @@ These files contain business logic specific to GenieIQ/GenieRx and move to their
 - `create_agent_session.py` — Two-tier session persistence (memory + Lakebase)
 - `prompts_create/` — Dynamic prompt assembly (9 modules: core, data_sources, requirements, plan, etc.)
 - `references/schema.md` — Genie Space schema reference
-- `genie_creator.py` — Genie API write operations
-- `genie_client.py` — Genie API read operations (including SP-fallback for missing OAuth scopes, added in PR #7)
+- `genie_client.py` — Genie API read operations (including SP-fallback for missing OAuth scopes)
 - `lakebase.py` — PostgreSQL persistence with in-memory fallback
+- `auto_optimize.py` + GSO package — Auto-optimization pipeline
 
 ---
 
-## What Gets Replaced
-
-### 1. Tool Definition Boilerplate → `@agent.tool()` Decorators
+## What the Agent Layer Provides
 
-**Before** (create_agent_tools.py, ~80 lines per tool):
-```python
-TOOL_DEFINITIONS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "discover_catalogs",
-            "description": "List all Unity Catalog catalogs the user has access to.",
-            "parameters": {"type": "object", "properties": {}, "required": []},
-        },
-    },
-    # ... 15 more tool definitions with nested JSON schemas ...
-]
+### 1. Auto-generated tool definitions from `@agent.tool()` decorators
 
-def handle_tool_call(name: str, arguments: dict, session_config=None) -> dict:
-    handlers = {
-        "discover_catalogs": _discover_catalogs,
-        "discover_schemas": _discover_schemas,
-        # ... 14 more entries ...
-    }
-    handler = handlers.get(name)
-    # ... dispatch logic ...
-```
+Agent tools are defined as decorated functions — schemas, dispatch, and validation are auto-generated:
 
-**After** (auto-generated from function signatures):
 ```python
 @creator.tool(description="List all Unity Catalog catalogs the user has access to.")
 async def discover_catalogs() -> dict:
@@ -196,62 +120,31 @@ async def discover_schemas(catalog: str) -> dict:
     return {"schemas": list_schemas(catalog)}
 ```
 
-**Impact:** ~580 lines of JSON schemas + 40-line dispatch table → auto-generated.
+For tools with complex nested parameters (like `generate_config`), Pydantic models provide the schema and runtime validation in one place — see `agents/creator/schemas.py`.
 
-### 2. OBO Auth Middleware + SP Fallback → `request.user_context`
+### 2. OBO auth bridging via `obo_context()`
 
-**Before** (main.py + auth.py + genie_client.py):
-```python
-# main.py — ContextVar middleware
-class OBOAuthMiddleware(BaseHTTPMiddleware):
-    async def dispatch(self, request, call_next):
-        token = request.headers.get("x-forwarded-access-token", "")
-        if token:
-            set_obo_user_token(token)  # ContextVar
-        request.state.user_token = token
-        response = await call_next(request)
-        if not is_streaming:
-            clear_obo_user_token()
-        return response
-
-# auth.py — SP fallback for scope errors (added in PR #7)
-def get_service_principal_client() -> WorkspaceClient:
-    """Bypass OBO for ops requiring scopes the user token lacks."""
-    return _get_default_client()
-
-# genie_client.py — every API function repeats this pattern
-try:
-    return _get_space_with_client(client, genie_space_id)
-except Exception as e:
-    if _is_scope_error(e):
-        sp_client = get_service_principal_client()
-        return _get_space_with_client(sp_client, genie_space_id)
-
-# In streaming generators:
-if user_token:
-    set_obo_user_token(user_token)  # Must re-establish in generator!
-```
+Agents receive the user's token via `request.user_context`. The `obo_context()` context manager bridges this into the existing `get_workspace_client()` pattern so domain logic works unchanged:
 
-**After** (`@app_agent` handles it):
 ```python
-@app_agent(name="genie-scorer", ...)
-async def scorer(request: AgentRequest) -> AgentResponse:
-    # request.user_context.access_token is automatically available
-    # No ContextVar management, no SP fallback boilerplate
-    ...
+@scorer.tool(description="Run IQ scan on a Genie Space")
+async def scan_space(space_id: str, request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        # Existing scanner.py works as-is — get_workspace_client() returns OBO client
+        result = scanner.calculate_score(space_id)
 ```
 
-**Impact:** ~30 lines of middleware + SP fallback pattern duplicated across every API call → zero.
+### 3. Optional AI Dev Kit integration
 
-### 3. UC Client + SQL Executor → `databricks-tools-core`
+Where applicable, agents can use `databricks-tools-core` as drop-in replacements:
 
-| Current | Lines | Replacement |
-|---------|-------|-------------|
-| `backend/services/uc_client.py` | 60 | `from databricks_tools_core.unity_catalog import list_catalogs, list_schemas, list_tables` |
-| `backend/sql_executor.py` | 220 | `from databricks_tools_core.sql import execute_sql, get_best_warehouse` |
-| Warehouse auto-detection | 30 | `get_best_warehouse()` |
+| Existing service | AI Dev Kit equivalent |
+|---------|-------------|
+| `backend/services/uc_client.py` | `databricks_tools_core.unity_catalog` |
+| SQL execution in various services | `databricks_tools_core.sql` |
+| Warehouse auto-detection | `get_best_warehouse()` |
 
-**Impact:** 310 lines deleted, replaced by maintained library functions.
+This is optional and incremental — agents can import existing services or AI Dev Kit functions interchangeably.
 
 ---
 
@@ -342,89 +235,43 @@ Other workspace apps can discover and call these agents using `AgentDiscovery`.
 
 ---
 
-## Migration Path (Phased, Backwards-Compatible)
-
-### Phase 1: Scaffolding + Architecture Doc ← **This PR**
-
-- Architecture proposal for review
-- `agents.yaml` deployment config
-- Skeleton `app.py` + `app.yaml` for each agent
-- No behavior changes to existing monolith
-
-### Phase 2: Extract genie-scorer (lowest risk)
+## Implementation Roadmap
 
-**Why first:** No LLM calls, no streaming, no sessions — pure rule-based scoring. Validates the `@app_agent` pattern with minimal risk.
+Each phase adds a working agent. The monolith continues to serve production throughout.
 
-Files moved:
-- `backend/services/scanner.py` → `agents/scorer/scanner.py` (as-is)
-- Relevant Lakebase functions → `agents/scorer/lakebase.py`
+### Phase 1: Scaffolds + Architecture ← **This PR**
 
-What gets deleted from monolith:
-- Scan/history/star endpoints from `backend/routers/spaces.py` (~80 lines)
+- Agent scaffolds with tool signatures and source traceability
+- `agents.yaml` deployment config
+- Shared modules: `auth_bridge.py`, `lakebase_client.py`, `sp_fallback.py`
+- This document
 
-### Phase 3: Extract genie-fixer (streaming + LLM, medium complexity)
+### Phase 2: Wire up genie-scorer (lowest risk)
 
-**Why second:** Streaming SSE + LLM calls, but simpler than creator (no sessions, no 16 tools).
+**Why first:** No LLM calls, no streaming, no sessions — pure rule-based scoring. Validates the `@app_agent` + `obo_context()` pattern end-to-end.
 
-Files moved:
-- `backend/services/fix_agent.py` → `agents/fixer/fix_agent.py`
-- Fix prompt → `agents/fixer/prompts.py`
+- Agent tool implementations call `backend/services/scanner.py` directly
+- Deploy alongside monolith, verify via `/invocations` and MCP
 
-Validates: Streaming via async generator → SSE (auto-handled by `@app_agent`)
+### Phase 3: Wire up genie-fixer (streaming + LLM)
 
-### Phase 4: Extract genie-analyzer (streaming + LLM, high complexity)
+Validates SSE streaming through `@app_agent` + LLM tool calling.
 
-Files moved:
-- `backend/services/analyzer.py` → `agents/analyzer/analyzer.py`
-- Analysis prompts → `agents/analyzer/prompts/`
+### Phase 4: Wire up genie-analyzer (streaming + LLM, multi-tool)
 
 Tools: `fetch_space`, `analyze_section`, `analyze_all`, `query_genie`, `execute_sql`
 
-### Phase 5: Extract genie-optimizer
-
-Files moved:
-- `backend/services/optimizer.py` → `agents/optimizer/optimizer.py`
-- Benchmark labeling logic → `agents/optimizer/labeling.py`
+### Phase 5: Wire up genie-optimizer
 
 Tools: `generate_suggestions`, `merge_config`, `label_benchmark`
 
-### Phase 6: Extract genie-creator (most complex, last)
-
-**Why last:** 16 tools, session persistence, complex tool-calling loop with message compaction. Hardest extraction.
-
-Key change: 16 hand-coded tool definitions become `@creator.tool()` decorators:
-```python
-@creator.tool(description="List Unity Catalog catalogs")
-async def discover_catalogs() -> dict:
-    from databricks_tools_core.unity_catalog import list_catalogs
-    return {"catalogs": list_catalogs()}
-```
-
-What stays custom: Dynamic prompt assembly, session persistence, message compaction, config generation/validation. These are domain logic.
-
-What gets replaced:
-- Tool definition boilerplate (~580 lines of JSON schemas → auto-generated from function signatures)
-- `handle_tool_call()` dispatcher (~40 lines → auto-routing)
-- OBO middleware → `request.user_context`
-
-### Phase 7: Supervisor + Frontend
+### Phase 6: Wire up genie-creator (most complex)
 
-The supervisor becomes a thin shell that:
-1. Serves the React SPA (static files)
-2. Routes API calls to sub-agents
-3. Handles settings and auth endpoints
+16 tools, session persistence, complex tool-calling loop. Pydantic schemas (in `agents/creator/schemas.py`) replace hand-written JSON tool definitions.
 
-Frontend changes: **Minimal.** API client (`frontend/src/lib/api.ts`) keeps hitting the same paths. The supervisor proxies to sub-agents transparently.
+### Phase 7: Supervisor + frontend proxy
 
-### Phase 8: AI Dev Kit Integration
-
-Replace hand-rolled utilities with `databricks-tools-core` across all agents:
-
-| Current | Lines | Replacement |
-|---------|-------|-------------|
-| `backend/services/uc_client.py` | 60 | `databricks_tools_core.unity_catalog` |
-| `backend/sql_executor.py` | 220 | `databricks_tools_core.sql` |
-| Warehouse auto-detection in sql_executor | 30 | `get_best_warehouse()` |
+Optional: if agent deployment becomes the primary mode, add a supervisor that serves the React SPA and proxies API calls to sub-agents. The frontend stays unchanged — same API paths, same behavior.
 
 ---
 
@@ -448,18 +295,13 @@ This replaces the current "manual curl and check" testing with automated, repeat
 
 ---
 
-## Integration Challenges — Concrete Solutions
-
-The three auth systems that need bridging:
-- **Monolith auth** (`backend/services/auth.py:25`): `_obo_client` ContextVar → `WorkspaceClient`
-- **`@app_agent`** (`dbx_agent_app/core/types.py:32`): `request.user_context` → `UserContext` with `.access_token`
-- **`databricks-tools-core`** (`databricks_tools_core/auth.py:35-36`): `_host_ctx`/`_token_ctx` ContextVars via `set_databricks_auth()`
+## Shared Modules
 
-### 1. OBO Auth Bridge → `agents/_shared/auth_bridge.py`
+The agent layer includes shared utilities in `agents/_shared/` that handle the integration between `@app_agent` and existing backend services.
 
-**Problem:** Each agent receives `request.user_context` from `@app_agent`, but domain logic calls `get_workspace_client()` from the monolith's auth module. During migration, both patterns need to work. And `databricks-tools-core` functions use their own separate ContextVars.
+### 1. Auth Bridge → `agents/_shared/auth_bridge.py`
 
-**Solution:** `obo_context()` context manager that sets up all three auth systems in one `with` block:
+`obo_context()` is a context manager that bridges `@app_agent`'s `request.user_context` into the existing `get_workspace_client()` pattern, plus `databricks-tools-core` ContextVars. This lets agent tools call existing domain logic without modification:
 
 ```python
 from agents._shared.auth_bridge import obo_context
@@ -477,9 +319,7 @@ For streaming generators, capture the token before the generator starts and re-e
 
 ### 2. Complex Tool Schemas → `agents/creator/schemas.py`
 
-**Problem:** `generate_config` has 11 parameters with 4-5 nesting levels (tables → column configs, example SQLs → parameters, etc.). `@app_agent`'s schema generator only handles primitives. The monolith defines these schemas as **~580 lines of hand-written JSON** in `create_agent_tools.py` — brittle, hard to maintain, and easy to get out of sync with the runtime code.
-
-**Solution:** **~80 lines of Pydantic models** that auto-generate the equivalent JSON Schema via `.model_json_schema()` and double as runtime validation:
+For tools with deeply nested parameters (like `generate_config` with 11 params across 4-5 nesting levels), Pydantic models provide the JSON Schema and runtime validation in ~80 lines:
 
 ```python
 from agents.creator.schemas import GenerateConfigArgs
@@ -492,13 +332,11 @@ async def generate_config(**kwargs) -> dict:
     args = GenerateConfigArgs(**kwargs)  # Validate at runtime
 ```
 
-580 lines of hand-maintained JSON → 80 lines of Pydantic models. Schema and validation are always in sync because they come from the same source.
-
-### 3. Frontend Transparency → `agents/supervisor/proxy.py`
+Schema and validation stay in sync because they come from the same source.
 
-**Problem:** The React SPA makes 28 API calls to `/api/*` that route to 5 different sub-agents after decomposition. The frontend should not change.
+### 3. Supervisor Proxy → `agents/supervisor/proxy.py`
 
-**Solution:** Ordered route table with prefix matching, glob support for path parameters, and SSE stream detection:
+If agents are deployed independently, the supervisor proxies frontend API calls to the correct agent. Ordered route table with prefix matching, glob support for path parameters, and SSE stream detection:
 
 ```python
 ROUTE_TABLE = [
@@ -513,11 +351,9 @@ ROUTE_TABLE = [
 
 SSE streams are detected by `content-type: text/event-stream` and forwarded as chunked bytes. OBO headers pass through automatically.
 
-### 4. SP Fallback Decorator → `agents/_shared/sp_fallback.py`
-
-**Problem:** The `_is_scope_error()` + retry-with-SP pattern is duplicated across `genie_client.py` and `spaces.py`. Each agent that calls Genie APIs needs this pattern.
+### 4. SP Fallback → `agents/_shared/sp_fallback.py`
 
-**Solution:** `@with_sp_fallback` decorator and `genie_api_call()` convenience function:
+Centralizes the SP-fallback pattern for Genie API calls where the user's OBO token may lack required OAuth scopes:
 
 ```python
 from agents._shared.sp_fallback import genie_api_call
@@ -529,9 +365,7 @@ space = genie_api_call("GET", f"/api/2.0/genie/spaces/{space_id}",
 
 ### 5. Shared Lakebase Pool → `agents/_shared/lakebase_client.py`
 
-**Problem:** Multiple agents need Lakebase (scorer for scores/stars, creator for sessions). Each runs as a separate Databricks App with its own credentials.
-
-**Solution:** Shared pool lifecycle + idempotent DDL per agent:
+Shared asyncpg pool lifecycle with idempotent DDL. Each agent initializes its own pool from its own env vars:
 
 ```python
 from agents._shared.lakebase_client import init_pool, SCORER_DDL
@@ -544,20 +378,16 @@ Each agent initializes its own pool from its own env vars. Domain-specific query
 
 ---
 
-## Estimated Impact
-
-| Metric | Before | After |
-|--------|--------|-------|
-| Backend Python lines | ~10,178 | ~7,100 (30% reduction from eliminating boilerplate) |
-| Files deleted | 0 | 5 (routers + utility wrappers replaced by libraries) |
-| Tool definition boilerplate | ~580 lines JSON schemas | 0 (auto-generated from type hints) |
-| Dispatch table code | ~40 lines | 0 (auto-routing by `@app_agent`) |
-| OBO auth code | ~30 lines middleware | 0 (handled by framework) |
-| Auto-generated endpoints | 0 | 30+ (5 agents × 6 endpoints each: /invocations, /health, agent.json, MCP, etc.) |
-| MCP servers | 0 | 5 (one per agent, free) |
-| Agent discovery | None | A2A protocol, workspace-wide |
-| Eval support | Manual testing | `mlflow.genai.evaluate()` via bridge |
-| Deployment | Single `databricks apps deploy` | `dbx-agent-app deploy --config agents.yaml` (per-agent or all) |
+## What You Get
+
+| Capability | Today | With agent layer |
+|------------|-------|-----------------|
+| Auto-generated endpoints | — | 30+ (5 agents × `/invocations`, `/health`, `agent.json`, MCP, etc.) |
+| MCP servers | — | 5 (one per agent, free) |
+| Agent discovery | — | A2A protocol, workspace-wide |
+| Eval support | Manual testing | `mlflow.genai.evaluate()` via `app_predict_fn()` bridge |
+| Independent deployment | — | `dbx-agent-app deploy --config agents.yaml --agent scorer` |
+| Tool definitions | Hand-written JSON schemas | Auto-generated from function signatures + Pydantic models |
 
 ---
 
@@ -577,33 +407,23 @@ Each agent initializes its own pool from its own env vars. Domain-specific query
 
 ## Files in This PR
 
-### New files — scaffolds + deployment
-- `docs/architecture-proposal.md` — this document
+All files are additive. No changes to existing `backend/`, `frontend/`, `packages/`, or `scripts/`.
+
+### Agent scaffolds
 - `agents.yaml` — multi-agent deployment config
-- `agents/scorer/app.py` — scorer agent scaffold
-- `agents/scorer/app.yaml` — scorer Databricks Apps config
-- `agents/analyzer/app.py` — analyzer agent scaffold
-- `agents/analyzer/app.yaml` — analyzer Databricks Apps config
-- `agents/creator/app.py` — creator agent scaffold
-- `agents/creator/app.yaml` — creator Databricks Apps config
-- `agents/optimizer/app.py` — optimizer agent scaffold
-- `agents/optimizer/app.yaml` — optimizer Databricks Apps config
-- `agents/fixer/app.py` — fixer agent scaffold
-- `agents/fixer/app.yaml` — fixer Databricks Apps config
-
-### New files — integration challenge solutions
-- `agents/_shared/__init__.py` — shared utilities package
-- `agents/_shared/auth_bridge.py` — Challenge 1: OBO auth context manager bridging all 3 auth systems
-- `agents/_shared/sp_fallback.py` — Challenge 4: SP fallback decorator for Genie API scope errors
-- `agents/_shared/lakebase_client.py` — Challenge 5: Shared Lakebase pool with idempotent DDL
-- `agents/creator/schemas.py` — Challenge 2: Pydantic models replacing ~580 lines of JSON schemas
-- `agents/supervisor/__init__.py` — supervisor package
-- `agents/supervisor/proxy.py` — Challenge 3: Frontend-transparent proxy with SSE support
-
-### Modified files
-- `agents/scorer/app.py` — wired up auth_bridge, sp_fallback, and lakebase_client imports
-- `agents/creator/app.py` — uses Pydantic schema override for generate_config/present_plan
-- `docs/architecture-proposal.md` — replaced placeholder challenge descriptions with concrete solutions
-
-### No changes to existing monolith
-The existing `backend/` code is untouched. All new files are additive.
+- `agents/scorer/app.py` + `app.yaml` — scorer agent (wraps scanner.py)
+- `agents/analyzer/app.py` + `app.yaml` — analyzer agent (wraps analyzer.py)
+- `agents/creator/app.py` + `app.yaml` — creator agent (wraps create_agent.py)
+- `agents/creator/schemas.py` — Pydantic models for complex tool parameters
+- `agents/optimizer/app.py` + `app.yaml` — optimizer agent (wraps optimizer.py)
+- `agents/fixer/app.py` + `app.yaml` — fixer agent (wraps fix_agent.py)
+- `agents/supervisor/proxy.py` — frontend-transparent proxy with SSE support
+
+### Shared modules
+- `agents/_shared/auth_bridge.py` — OBO auth context manager bridging `@app_agent` ↔ existing services
+- `agents/_shared/sp_fallback.py` — SP fallback decorator for Genie API scope errors
+- `agents/_shared/lakebase_client.py` — Shared Lakebase pool with idempotent DDL
+
+### Documentation
+- `docs/architecture-proposal.md` — this document
+- `docs/genierx-spec.md` — GenieRX analyzer/recommender specification

From db87140d70ca42910794465494d75533310dfc96 Mon Sep 17 00:00:00 2001
From: Stuart Gano <stuart.gano@databricks.com>
Date: Mon, 23 Mar 2026 17:29:58 -0700
Subject: [PATCH 6/6] feat: wire up all five agents to call existing backend
 services
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace NotImplementedError stubs with actual implementations:

- scorer: calls scanner.py, lakebase.py for scoring/history/stars
- analyzer: calls genie_client.py, sql_executor.py, result_comparator.py
- optimizer: calls optimizer.py for suggestions, merge, and space creation
- fixer: calls fix_agent.py for patch generation and application
- creator: delegates all 16 tools to create_agent_tools.handle_tool_call()

All agents use obo_context() to bridge auth. No changes to existing
backend services — agents are thin wrappers.

Co-authored-by: Isaac
---
 agents/analyzer/app.py    | 133 ++++++++++++++++++++++++++++++
 agents/analyzer/app.yaml  |   8 ++
 agents/creator/app.py     | 167 ++++++++++++++++----------------------
 agents/fixer/app.py       |  85 +++++++++++++++++++
 agents/fixer/app.yaml     |   6 ++
 agents/optimizer/app.py   | 116 ++++++++++++++++++++++++++
 agents/optimizer/app.yaml |  10 +++
 agents/scorer/app.py      | 126 ++++++++++++++++------------
 8 files changed, 505 insertions(+), 146 deletions(-)
 create mode 100644 agents/analyzer/app.py
 create mode 100644 agents/analyzer/app.yaml
 create mode 100644 agents/fixer/app.py
 create mode 100644 agents/fixer/app.yaml
 create mode 100644 agents/optimizer/app.py
 create mode 100644 agents/optimizer/app.yaml

diff --git a/agents/analyzer/app.py b/agents/analyzer/app.py
new file mode 100644
index 0000000..c3bdef4
--- /dev/null
+++ b/agents/analyzer/app.py
@@ -0,0 +1,133 @@
+"""genie-analyzer — analysis, querying, and SQL execution agent.
+
+Wraps:
+  - backend/routers/analysis.py  (fetch, parse, query, SQL, benchmark compare)
+  - backend/services/genie_client.py (space fetching, Genie queries)
+  - backend/sql_executor.py (SQL warehouse execution)
+
+Streaming: No (all request/response)
+LLM: Yes (benchmark comparison uses LLM)
+"""
+
+from __future__ import annotations
+
+from dbx_agent_app import AgentRequest, AgentResponse, app_agent
+
+from agents._shared.auth_bridge import obo_context
+
+
+@app_agent(
+    name="genie-analyzer",
+    description=(
+        "Fetches and parses Genie Space configurations, queries Genie for SQL, "
+        "executes SQL on warehouses, and compares benchmark results."
+    ),
+)
+async def analyzer(request: AgentRequest) -> AgentResponse:
+    """Route incoming agent requests to analysis tools."""
+    ...
+
+
+# ── Tools ────────────────────────────────────────────────────────────────────
+
+
+@analyzer.tool(
+    description=(
+        "Fetch and parse a Genie Space by ID. Returns the space "
+        "configuration data."
+    ),
+)
+async def fetch_space(genie_space_id: str, request: AgentRequest) -> dict:
+    """Wraps backend/services/genie_client.py::get_serialized_space"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.genie_client import get_serialized_space
+        space_data = get_serialized_space(genie_space_id)
+        return {"genie_space_id": genie_space_id, "space_data": space_data}
+
+
+@analyzer.tool(
+    description="Parse pasted Genie Space JSON from the API response.",
+)
+async def parse_space_json(json_content: str) -> dict:
+    """Wraps backend/routers/analysis.py::parse_space_json logic"""
+    import json
+    from datetime import datetime
+
+    raw_response = json.loads(json_content)
+    if "serialized_space" not in raw_response:
+        raise ValueError("Missing 'serialized_space' field in JSON")
+
+    serialized = raw_response["serialized_space"]
+    space_data = json.loads(serialized) if isinstance(serialized, str) else serialized
+    genie_space_id = f"pasted-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
+
+    return {"genie_space_id": genie_space_id, "space_data": space_data}
+
+
+@analyzer.tool(
+    description=(
+        "Query a Genie Space with a natural language question. "
+        "Returns the generated SQL if successful."
+    ),
+)
+async def query_genie(genie_space_id: str, question: str, request: AgentRequest) -> dict:
+    """Wraps backend/services/genie_client.py::query_genie_for_sql"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.genie_client import query_genie_for_sql
+        return query_genie_for_sql(
+            genie_space_id=genie_space_id,
+            question=question,
+        )
+
+
+@analyzer.tool(
+    description=(
+        "Execute a read-only SQL query on a Databricks SQL Warehouse. "
+        "Returns tabular results limited to 1000 rows."
+    ),
+)
+async def execute_sql(sql: str, warehouse_id: str | None = None, request: AgentRequest = None) -> dict:
+    """Wraps backend/sql_executor.py::execute_sql"""
+    with obo_context(request.user_context.access_token):
+        from backend.sql_executor import execute_sql as _execute
+        return _execute(sql=sql, warehouse_id=warehouse_id)
+
+
+@analyzer.tool(
+    description=(
+        "Compare Genie SQL results against expected SQL results using "
+        "LLM-based semantic comparison. Returns match type, confidence, "
+        "and an auto-label suggestion."
+    ),
+)
+async def compare_results(
+    genie_result: dict,
+    expected_result: dict,
+    genie_sql: str | None = None,
+    expected_sql: str | None = None,
+    question: str | None = None,
+    request: AgentRequest = None,
+) -> dict:
+    """Wraps backend/services/result_comparator.py::compare_results"""
+    import asyncio
+    with obo_context(request.user_context.access_token):
+        from backend.services.auth import run_in_context
+        from backend.services.result_comparator import compare_results as _compare
+
+        result = await asyncio.get_running_loop().run_in_executor(
+            None,
+            run_in_context(
+                _compare,
+                genie_result=genie_result,
+                expected_result=expected_result,
+                genie_sql=genie_sql,
+                expected_sql=expected_sql,
+                question=question,
+            ),
+        )
+        return result.model_dump() if hasattr(result, "model_dump") else result
+
+
+# ── Standalone entry point ───────────────────────────────────────────────────
+
+app = analyzer.app
diff --git a/agents/analyzer/app.yaml b/agents/analyzer/app.yaml
new file mode 100644
index 0000000..3f82b08
--- /dev/null
+++ b/agents/analyzer/app.yaml
@@ -0,0 +1,8 @@
+command: ["uvicorn", "agents.analyzer.app:app", "--host", "0.0.0.0", "--port", "8000"]
+env:
+  - name: DATABRICKS_HOST
+    value: ""
+  - name: SQL_WAREHOUSE_ID
+    valueFrom: sql-warehouse
+  - name: LLM_MODEL
+    value: "databricks-claude-sonnet-4-6"
diff --git a/agents/creator/app.py b/agents/creator/app.py
index 966e493..2388a66 100644
--- a/agents/creator/app.py
+++ b/agents/creator/app.py
@@ -1,24 +1,15 @@
 """genie-creator — Conversational wizard for building new Genie Spaces.
 
-Extracted from:
-  - backend/routers/create.py              (UC discovery, validation, agent chat, sessions)
+Wraps:
   - backend/services/create_agent.py       (CreateGenieAgent tool-calling loop)
-  - backend/services/create_agent_tools.py (16 tool definitions + implementations)
+  - backend/services/create_agent_tools.py (16 tool implementations + dispatcher)
   - backend/services/create_agent_session.py (two-tier session persistence)
-  - backend/services/uc_client.py          (UC browsing — replaced by AI Dev Kit)
-  - backend/prompts_create/                (dynamic prompt assembly, 9 modules)
-  - backend/references/                    (schema.md reference)
+  - backend/services/uc_client.py          (UC browsing)
+  - backend/prompts_create/                (dynamic prompt assembly)
   - backend/genie_creator.py              (Genie API write operations)
 
-This is the MOST COMPLEX agent (Phase 6 extraction). The tool-calling loop,
-message compaction, session persistence, and dynamic prompting are all
-irreplaceable domain logic that moves as-is.
-
-What gets replaced:
-  - 580 lines of JSON tool schemas → auto-generated from @creator.tool() signatures
-  - 40-line handle_tool_call() dispatcher → auto-routing
-  - uc_client.py (60 lines) → databricks_tools_core.unity_catalog
-  - sql_executor.py (220 lines) → databricks_tools_core.sql
+This is the most complex agent — 16 tools, session persistence, LLM
+tool-calling loop with message compaction.
 
 Streaming: Yes (SSE for agent chat)
 LLM: Yes (tool-calling loop with Claude)
@@ -28,6 +19,7 @@
 
 from dbx_agent_app import AgentRequest, AgentResponse, app_agent
 
+from agents._shared.auth_bridge import obo_context
 from agents.creator.schemas import GenerateConfigArgs
 
 
@@ -40,103 +32,93 @@
     ),
 )
 async def creator(request: AgentRequest) -> AgentResponse:
-    """Route incoming agent requests to the creator workflow.
+    """Route incoming agent requests to the creator workflow."""
+    ...
 
-    The core tool-calling loop (CreateGenieAgent.chat) moves here as-is.
-    It handles: step detection, LLM streaming, tool dispatch, message
-    compaction, JSON repair, and session management.
 
-    Source: backend/services/create_agent.py::CreateGenieAgent.chat
-    """
-    # TODO: Phase 6 — move CreateGenieAgent.chat here
-    ...
+# ── Helper ───────────────────────────────────────────────────────────────────
+
+def _call_tool(name: str, arguments: dict, session_config: dict | None = None) -> dict:
+    """Dispatch to backend/services/create_agent_tools.py::handle_tool_call."""
+    from backend.services.create_agent_tools import handle_tool_call
+    return handle_tool_call(name, arguments, session_config=session_config)
 
 
 # ── UC Discovery Tools ──────────────────────────────────────────────────────
-# Phase 8: Replace implementations with databricks_tools_core
 
 
 @creator.tool(description="List all Unity Catalog catalogs the user has access to.")
-async def discover_catalogs() -> dict:
-    """Source: backend/services/uc_client.py::list_catalogs
-
-    Phase 8: from databricks_tools_core.unity_catalog import list_catalogs
-    """
-    raise NotImplementedError("Phase 6/8")
+async def discover_catalogs(request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("discover_catalogs", {})
 
 
 @creator.tool(description="List schemas within a catalog.")
-async def discover_schemas(catalog: str) -> dict:
-    """Source: backend/services/uc_client.py::list_schemas"""
-    raise NotImplementedError("Phase 6/8")
+async def discover_schemas(catalog: str, request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("discover_schemas", {"catalog": catalog})
 
 
 @creator.tool(description="List tables within a catalog.schema.")
-async def discover_tables(catalog: str, schema: str) -> dict:
-    """Source: backend/services/uc_client.py::list_tables"""
-    raise NotImplementedError("Phase 6/8")
+async def discover_tables(catalog: str, schema: str, request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("discover_tables", {"catalog": catalog, "schema": schema})
 
 
-# ── Table Inspection Tools ───────────────────────────────────────────────────
+# ── Table Inspection Tools ──────────────────────────────────────────────────
 
 
 @creator.tool(
     description="Get detailed table metadata: columns, types, descriptions, row count, sample rows.",
 )
-async def describe_table(table: str) -> dict:
-    """Source: backend/services/create_agent_tools.py::_describe_table (lines ~860-960)"""
-    raise NotImplementedError("Phase 6")
+async def describe_table(table: str, request: AgentRequest) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("describe_table", {"table": table})
 
 
 @creator.tool(
-    description=(
-        "Profile selected columns: distinct values, null percentage, "
-        "min/max, data type distribution."
-    ),
+    description="Profile selected columns: distinct values, null percentage, min/max.",
 )
-async def profile_columns(table: str, columns: list[str] | None = None) -> dict:
-    """Source: backend/services/create_agent_tools.py::_profile_columns"""
-    raise NotImplementedError("Phase 6")
+async def profile_columns(table: str, columns: list[str] | None = None, request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("profile_columns", {"table": table, "columns": columns})
 
 
 @creator.tool(
     description="Assess data quality: null rates, duplicate rates, freshness, anomalies.",
 )
-async def assess_data_quality(tables: list[str]) -> dict:
-    """Source: backend/services/create_agent_tools.py::_assess_data_quality"""
-    raise NotImplementedError("Phase 6")
+async def assess_data_quality(tables: list[str], request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("assess_data_quality", {"tables": tables})
 
 
 @creator.tool(
     description="Profile table usage patterns: query frequency, common joins, active users.",
 )
-async def profile_table_usage(tables: list[str]) -> dict:
-    """Source: backend/services/create_agent_tools.py::_profile_table_usage"""
-    raise NotImplementedError("Phase 6")
+async def profile_table_usage(tables: list[str], request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("profile_table_usage", {"tables": tables})
 
 
 @creator.tool(description="Execute a test SQL query and return results (read-only, max 5 rows).")
-async def test_sql(sql: str) -> dict:
-    """Source: backend/services/create_agent_tools.py::_test_sql
-
-    Phase 8: Replace with databricks_tools_core.sql.execute_sql
-    """
-    raise NotImplementedError("Phase 6/8")
+async def test_sql(sql: str, request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("test_sql", {"sql": sql})
 
 
 @creator.tool(description="List available SQL warehouses for the user.")
-async def discover_warehouses() -> dict:
-    """Source: backend/services/create_agent_tools.py::_discover_warehouses"""
-    raise NotImplementedError("Phase 6")
+async def discover_warehouses(request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("discover_warehouses", {})
 
 
-# ── Config Generation Tools ──────────────────────────────────────────────────
+# ── Config Generation Tools ─────────────────────────────────────────────────
 
 
 @creator.tool(description="Get the Genie Space configuration JSON schema reference.")
-async def get_config_schema() -> dict:
-    """Source: backend/services/create_agent_tools.py::_get_config_schema"""
-    raise NotImplementedError("Phase 6")
+async def get_config_schema(request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("get_config_schema", {})
 
 
 @creator.tool(
@@ -146,45 +128,32 @@ async def get_config_schema() -> dict:
     ),
     parameters=GenerateConfigArgs.model_json_schema(),
 )
-async def generate_config(**kwargs) -> dict:
-    """Source: backend/services/create_agent_tools.py::_generate_config (~lines 245-650)
-
-    This is the largest tool implementation. The LLM provides content;
-    this tool handles all structural formatting (JSON schema compliance,
-    column config normalization, instruction budget enforcement).
-
-    Integration pattern (Challenge 2):
-        Pydantic model auto-generates the JSON Schema for @app_agent
-        registration, replacing ~580 lines of hand-written schema.
-        Runtime validation catches malformed LLM output early.
-    """
+async def generate_config(request: AgentRequest = None, **kwargs) -> dict:
     args = GenerateConfigArgs(**kwargs)
-    # TODO Phase 6: move _generate_config implementation here
-    # args.tables, args.sample_questions, etc. are all validated
-    raise NotImplementedError("Phase 6")
+    with obo_context(request.user_context.access_token):
+        return _call_tool("generate_config", args.model_dump())
 
 
 @creator.tool(
     description="Present the space creation plan to the user for review before generating config.",
     parameters=GenerateConfigArgs.model_json_schema(),
 )
-async def present_plan(**kwargs) -> dict:
-    """Source: backend/services/create_agent_tools.py::_present_plan"""
+async def present_plan(request: AgentRequest = None, **kwargs) -> dict:
     args = GenerateConfigArgs(**kwargs)
-    # TODO Phase 6: move _present_plan implementation here
-    raise NotImplementedError("Phase 6")
+    with obo_context(request.user_context.access_token):
+        return _call_tool("present_plan", args.model_dump())
 
 
 @creator.tool(description="Validate a generated configuration against the Genie Space schema.")
-async def validate_config(config: dict) -> dict:
-    """Source: backend/services/create_agent_tools.py::_validate_config"""
-    raise NotImplementedError("Phase 6")
+async def validate_config(config: dict, request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("validate_config", {"config": config})
 
 
 @creator.tool(description="Apply incremental updates to an existing generated configuration.")
-async def update_config(config: dict, updates: dict) -> dict:
-    """Source: backend/services/create_agent_tools.py::_update_config"""
-    raise NotImplementedError("Phase 6")
+async def update_config(config: dict, updates: dict, request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("update_config", {"config": config, "updates": updates})
 
 
 @creator.tool(
@@ -195,17 +164,23 @@ async def create_space(
     config: dict,
     parent_path: str | None = None,
     warehouse_id: str | None = None,
+    request: AgentRequest = None,
 ) -> dict:
-    """Source: backend/services/create_agent_tools.py::_create_space + backend/genie_creator.py"""
-    raise NotImplementedError("Phase 6")
+    with obo_context(request.user_context.access_token):
+        return _call_tool("create_space", {
+            "display_name": display_name,
+            "config": config,
+            "parent_path": parent_path,
+            "warehouse_id": warehouse_id,
+        })
 
 
 @creator.tool(
     description="Update an existing Genie Space with a modified configuration.",
 )
-async def update_space(space_id: str, config: dict) -> dict:
-    """Source: backend/services/create_agent_tools.py::_update_space"""
-    raise NotImplementedError("Phase 6")
+async def update_space(space_id: str, config: dict, request: AgentRequest = None) -> dict:
+    with obo_context(request.user_context.access_token):
+        return _call_tool("update_space", {"space_id": space_id, "config": config})
 
 
 # ── Standalone entry point ───────────────────────────────────────────────────
diff --git a/agents/fixer/app.py b/agents/fixer/app.py
new file mode 100644
index 0000000..edd0c15
--- /dev/null
+++ b/agents/fixer/app.py
@@ -0,0 +1,85 @@
+"""genie-fixer — AI fix agent for Genie Space configurations.
+
+Wraps:
+  - backend/services/fix_agent.py (FixAgent — LLM patch generation + application)
+
+Streaming: Yes (SSE — thinking, patch, applying, complete/error events)
+LLM: Yes (fix plan generation)
+"""
+
+from __future__ import annotations
+
+from dbx_agent_app import AgentRequest, AgentResponse, app_agent
+
+from agents._shared.auth_bridge import obo_context
+
+
+@app_agent(
+    name="genie-fixer",
+    description=(
+        "AI fix agent that generates and applies targeted patches to Genie "
+        "Space configurations based on IQ scan findings."
+    ),
+)
+async def fixer(request: AgentRequest) -> AgentResponse:
+    """Route incoming agent requests to fix tools."""
+    ...
+
+
+# ── Tools ────────────────────────────────────────────────────────────────────
+
+
+@fixer.tool(
+    description=(
+        "Generate and apply fixes to a Genie Space based on IQ scan findings. "
+        "Returns a stream of progress events: thinking, patch details, "
+        "application status, and final result with before/after diff."
+    ),
+)
+async def generate_fixes(
+    space_id: str,
+    findings: list[str],
+    space_config: dict,
+    request: AgentRequest = None,
+) -> list[dict]:
+    """Wraps backend/services/fix_agent.py::FixAgent.run
+
+    Collects the streaming events into a list for agent protocol compatibility.
+    For SSE streaming via the supervisor proxy, use the monolith endpoint.
+    """
+    with obo_context(request.user_context.access_token):
+        from backend.services.fix_agent import get_fix_agent
+
+        agent = get_fix_agent()
+        events = []
+        async for event in agent.run(
+            space_id=space_id,
+            findings=findings,
+            space_config=space_config,
+        ):
+            events.append(event)
+
+        return events
+
+
+@fixer.tool(
+    description=(
+        "Apply a specific config patch to a Genie Space via the Databricks API. "
+        "Takes a full updated config and writes it to the space."
+    ),
+)
+async def apply_patch(
+    space_id: str,
+    updated_config: dict,
+    request: AgentRequest = None,
+) -> dict:
+    """Wraps backend/services/fix_agent.py::_apply_config_to_databricks"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.fix_agent import _apply_config_to_databricks
+        await _apply_config_to_databricks(space_id, updated_config)
+        return {"space_id": space_id, "status": "applied"}
+
+
+# ── Standalone entry point ───────────────────────────────────────────────────
+
+app = fixer.app
diff --git a/agents/fixer/app.yaml b/agents/fixer/app.yaml
new file mode 100644
index 0000000..0f99d55
--- /dev/null
+++ b/agents/fixer/app.yaml
@@ -0,0 +1,6 @@
+command: ["uvicorn", "agents.fixer.app:app", "--host", "0.0.0.0", "--port", "8000"]
+env:
+  - name: DATABRICKS_HOST
+    value: ""
+  - name: LLM_MODEL
+    value: "databricks-claude-sonnet-4-6"
diff --git a/agents/optimizer/app.py b/agents/optimizer/app.py
new file mode 100644
index 0000000..a4b882f
--- /dev/null
+++ b/agents/optimizer/app.py
@@ -0,0 +1,116 @@
+"""genie-optimizer — optimization suggestions from benchmark feedback.
+
+Wraps:
+  - backend/services/optimizer.py (GenieSpaceOptimizer)
+  - backend/routers/analysis.py   (optimize, merge, create endpoints)
+
+Streaming: Yes (heartbeat SSE for long LLM calls)
+LLM: Yes (suggestion generation)
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+from dbx_agent_app import AgentRequest, AgentResponse, app_agent
+
+from agents._shared.auth_bridge import obo_context
+
+
+@app_agent(
+    name="genie-optimizer",
+    description=(
+        "Generates optimization suggestions for Genie Spaces based on "
+        "benchmark labeling feedback. Merges suggestions into config and "
+        "can create new optimized spaces."
+    ),
+)
+async def optimizer(request: AgentRequest) -> AgentResponse:
+    """Route incoming agent requests to optimization tools."""
+    ...
+
+
+# ── Tools ────────────────────────────────────────────────────────────────────
+
+
+@optimizer.tool(
+    description=(
+        "Generate optimization suggestions based on benchmark labeling "
+        "feedback. Uses LLM to analyze failure patterns and recommend "
+        "config changes. May take 30-90 seconds."
+    ),
+)
+async def generate_suggestions(
+    space_data: dict,
+    labeling_feedback: list[dict],
+    request: AgentRequest = None,
+) -> dict:
+    """Wraps backend/services/optimizer.py::generate_optimizations"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.auth import run_in_context
+        from backend.services.optimizer import get_optimizer
+        from backend.models import LabelingFeedbackItem
+
+        feedback_items = [LabelingFeedbackItem(**f) for f in labeling_feedback]
+
+        def _run():
+            return get_optimizer().generate_optimizations(
+                space_data=space_data,
+                labeling_feedback=feedback_items,
+            )
+
+        result = await asyncio.get_running_loop().run_in_executor(
+            None, run_in_context(_run),
+        )
+        return result.model_dump()
+
+
+@optimizer.tool(
+    description=(
+        "Merge optimization suggestions into a space config. Fast operation "
+        "that applies field-level changes without LLM calls."
+    ),
+)
+async def merge_config(
+    space_data: dict,
+    suggestions: list[dict],
+    request: AgentRequest = None,
+) -> dict:
+    """Wraps backend/services/optimizer.py::merge_config"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.optimizer import get_optimizer
+        from backend.models import OptimizationSuggestion
+
+        suggestion_items = [OptimizationSuggestion(**s) for s in suggestions]
+        result = get_optimizer().merge_config(
+            space_data=space_data,
+            suggestions=suggestion_items,
+        )
+        return result.model_dump()
+
+
+@optimizer.tool(
+    description=(
+        "Create a new Genie Space with an optimized configuration. "
+        "Requires GENIE_TARGET_DIRECTORY to be configured."
+    ),
+)
+async def create_space(
+    display_name: str,
+    merged_config: dict,
+    parent_path: str | None = None,
+    request: AgentRequest = None,
+) -> dict:
+    """Wraps backend/genie_creator.py::create_genie_space"""
+    with obo_context(request.user_context.access_token):
+        from backend.genie_creator import create_genie_space as _create
+        return _create(
+            display_name=display_name,
+            merged_config=merged_config,
+            parent_path=parent_path,
+        )
+
+
+# ── Standalone entry point ───────────────────────────────────────────────────
+
+app = optimizer.app
diff --git a/agents/optimizer/app.yaml b/agents/optimizer/app.yaml
new file mode 100644
index 0000000..e12f39e
--- /dev/null
+++ b/agents/optimizer/app.yaml
@@ -0,0 +1,10 @@
+command: ["uvicorn", "agents.optimizer.app:app", "--host", "0.0.0.0", "--port", "8000"]
+env:
+  - name: DATABRICKS_HOST
+    value: ""
+  - name: SQL_WAREHOUSE_ID
+    valueFrom: sql-warehouse
+  - name: LLM_MODEL
+    value: "databricks-claude-sonnet-4-6"
+  - name: GENIE_TARGET_DIRECTORY
+    value: "/Shared/"
diff --git a/agents/scorer/app.py b/agents/scorer/app.py
index 2789160..a9058f1 100644
--- a/agents/scorer/app.py
+++ b/agents/scorer/app.py
@@ -1,17 +1,11 @@
 """genie-scorer — IQ scoring agent for Genie Spaces.
 
-Extracted from:
-  - backend/routers/spaces.py  (scan, history, star, list endpoints)
-  - backend/services/scanner.py (rule-based scoring engine)
-  - backend/services/lakebase.py (score persistence)
-
-This agent has NO LLM dependency — it's pure rule-based scoring.
-Lowest-risk extraction target; validates the @app_agent pattern.
-
-Integration patterns used:
-  - Challenge 1 (OBO auth): obo_context() bridges @app_agent → monolith auth
-  - Challenge 4 (SP fallback): genie_api_call() retries with SP on scope errors
-  - Challenge 5 (Lakebase): init_pool(SCORER_DDL) for idempotent schema setup
+Wraps:
+  - backend/services/scanner.py  (rule-based scoring engine)
+  - backend/services/lakebase.py (score persistence, stars)
+  - backend/routers/spaces.py    (list, detail endpoints)
+
+This agent has NO LLM dependency — pure rule-based scoring.
 """
 
 from __future__ import annotations
@@ -19,7 +13,6 @@
 from dbx_agent_app import AgentRequest, AgentResponse, app_agent
 
 from agents._shared.auth_bridge import obo_context
-from agents._shared.sp_fallback import genie_api_call
 from agents._shared.lakebase_client import init_pool, SCORER_DDL
 
 
@@ -33,7 +26,6 @@
 )
 async def scorer(request: AgentRequest) -> AgentResponse:
     """Route incoming agent requests to the appropriate scoring tool."""
-    # TODO: Parse intent from request.messages and dispatch to tools
     ...
 
 
@@ -46,35 +38,20 @@ async def on_startup():
 
 
 # ── Tools ────────────────────────────────────────────────────────────────────
-# Each tool maps to a current REST endpoint in backend/routers/spaces.py.
-# Domain logic lives in scanner.py (moved as-is from backend/services/).
 
 
 @scorer.tool(
     description=(
         "Run an IQ scan on a Genie Space. Fetches the space configuration, "
-        "calculates a score (0-100) across four dimensions (foundation, data "
-        "setup, SQL assets, optimization), and persists the result to Lakebase."
+        "calculates a score (0-15) across four dimensions, and persists the "
+        "result to Lakebase."
     ),
 )
 async def scan_space(space_id: str, request: AgentRequest) -> dict:
-    """Source: backend/services/scanner.py::scan_space + backend/routers/spaces.py::trigger_scan
-
-    Integration pattern:
-        obo_context() sets up both monolith ContextVar and tools-core auth.
-        genie_api_call() auto-retries with SP on scope errors.
-        Domain logic (scanner.calculate_score) works unchanged.
-    """
+    """Wraps backend/services/scanner.py::scan_space"""
     with obo_context(request.user_context.access_token):
-        # Fetch space config (with automatic SP fallback for scope errors)
-        space_data = genie_api_call(
-            "GET",
-            f"/api/2.0/genie/spaces/{space_id}",
-            query={"include_serialized_space": "true"},
-        )
-        # TODO Phase 2: scanner.calculate_score(space_data)
-        # TODO Phase 2: save_scan_result(space_id, score)
-        raise NotImplementedError("Phase 2: move scanner.py + lakebase.py here")
+        from backend.services.scanner import scan_space as _scan
+        return await _scan(space_id)
 
 
 @scorer.tool(
@@ -84,16 +61,20 @@ async def scan_space(space_id: str, request: AgentRequest) -> dict:
     ),
 )
 async def get_history(space_id: str, days: int = 30) -> list[dict]:
-    """Source: backend/services/lakebase.py::get_score_history"""
-    raise NotImplementedError("Phase 2: move lakebase.get_score_history here")
+    """Wraps backend/services/lakebase.py::get_score_history"""
+    from backend.services.lakebase import get_score_history
+    rows = await get_score_history(space_id, days=days)
+    return [dict(r) for r in rows] if rows else []
 
 
 @scorer.tool(
     description="Toggle the star (bookmark) status of a Genie Space.",
 )
 async def toggle_star(space_id: str, starred: bool) -> dict:
-    """Source: backend/services/lakebase.py::star_space"""
-    raise NotImplementedError("Phase 2: move lakebase.star_space here")
+    """Wraps backend/services/lakebase.py::star_space"""
+    from backend.services.lakebase import star_space
+    await star_space(space_id, starred)
+    return {"space_id": space_id, "starred": starred}
 
 
 @scorer.tool(
@@ -107,28 +88,73 @@ async def list_spaces(
     starred_only: bool = False,
     min_score: int | None = None,
     max_score: int | None = None,
+    request: AgentRequest = None,
 ) -> list[dict]:
-    """Source: backend/routers/spaces.py::list_spaces
+    """Wraps backend/routers/spaces.py::list_spaces logic"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.genie_client import list_genie_spaces
+        from backend.services.lakebase import get_latest_score, get_starred_spaces
+        from backend.services.auth import get_workspace_client
+
+        raw_spaces = list_genie_spaces()
+        client = get_workspace_client()
+        host = (client.config.host or "").rstrip("/")
+        starred_ids = set(await get_starred_spaces())
+
+        items = []
+        for space in raw_spaces:
+            sid = space.get("space_id", "")
+            title = space.get("display_name", space.get("title", ""))
+
+            if search and search.lower() not in title.lower():
+                continue
+            if starred_only and sid not in starred_ids:
+                continue
 
-    Note (PR #6-#8): API response uses `space_id`/`title` fields (not `id`/`display_name`).
-    Returns `space_url` per item (host + /genie/rooms/{space_id}).
-    Uses SP fallback via get_service_principal_client() when OBO token lacks genie scope.
-    """
-    raise NotImplementedError("Phase 2: move list_spaces logic here")
+            score_data = await get_latest_score(sid)
+            score = score_data.get("score") if score_data else None
+
+            if min_score is not None and (score is None or score < min_score):
+                continue
+            if max_score is not None and (score is None or score > max_score):
+                continue
+
+            items.append({
+                "space_id": sid,
+                "title": title,
+                "space_url": f"{host}/genie/rooms/{sid}" if host else None,
+                "score": score,
+                "maturity": score_data.get("maturity") if score_data else None,
+                "starred": sid in starred_ids,
+            })
+
+        return items
 
 
 @scorer.tool(
     description="Get detailed space metadata with latest scan result and star status.",
 )
-async def get_space_detail(space_id: str) -> dict:
-    """Source: backend/routers/spaces.py::get_space_detail
+async def get_space_detail(space_id: str, request: AgentRequest) -> dict:
+    """Wraps backend/routers/spaces.py::get_space_detail logic"""
+    with obo_context(request.user_context.access_token):
+        from backend.services.genie_client import get_genie_space
+        from backend.services.lakebase import get_latest_score, is_space_starred
+
+        space_info = get_genie_space(space_id)
+        score_data = await get_latest_score(space_id)
+        starred = await is_space_starred(space_id)
 
-    Note (PR #7): Includes SP fallback (_is_scope_error check) for Genie API calls.
-    """
-    raise NotImplementedError("Phase 2: move get_space_detail logic here")
+        return {
+            "space_id": space_id,
+            "title": space_info.get("display_name", ""),
+            "score": score_data.get("score") if score_data else None,
+            "maturity": score_data.get("maturity") if score_data else None,
+            "starred": starred,
+            "last_scanned": score_data.get("scanned_at") if score_data else None,
+            "scan_result": score_data,
+        }
 
 
 # ── Standalone entry point ───────────────────────────────────────────────────
-# For local development: uvicorn agents.scorer.app:app --port 8001
 
 app = scorer.app