Skip to content

Commit 4611851

Browse files
fix: OBO auth + SQL warehouse auto-detection for Databricks Apps
- Fix middleware to read user token from x-forwarded-access-token header (not Authorization), per Databricks Apps auth docs - Force auth_type="pat" in OBO client to prevent SDK from preferring oauth-m2m when DATABRICKS_CLIENT_ID is set in environment - Add SQL warehouse auto-detection: if SQL_WAREHOUSE_ID is not set, discover a running Pro/Serverless warehouse the user has access to - Update app.yaml to pull SQL_WAREHOUSE_ID from app resource via valueFrom - Add structured logging to middleware showing OBO vs SP auth path - Remove frontend/dist/ from git tracking (build artifact, deployed via sync) - Improve error messages for missing warehouse access
1 parent 0cf767c commit 4611851

20 files changed

Lines changed: 859 additions & 179 deletions

.databricksignore

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,20 @@ README.md
5151
.venv/
5252
venv/
5353

54-
# MLflow local runs
54+
# MLflow local runs and database
5555
mlruns/
56+
mlflow.db
57+
58+
# Other non-runtime files
59+
*.md
60+
!backend/references/schema.md
61+
CODEOWNERS.txt
62+
LICENSE.md
63+
NOTICE.md
64+
SECURITY.md
65+
notebooks/
66+
scripts/
67+
sql/
68+
.databricks/
69+
.gitleaksignore
70+
.gitignore

.gitignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ __pycache__/
2020
.Python
2121
build/
2222
develop-eggs/
23-
# NOTE: dist/ is NOT ignored because frontend/dist/ needs to be deployed
23+
# frontend/dist/ is ignored in git (build artifact) but NOT in .databricksignore
24+
# so it still gets synced to the workspace for deployment.
25+
frontend/dist/
2426
# The .databricksignore file controls what gets synced to Databricks Apps
2527
downloads/
2628
eggs/
@@ -209,13 +211,14 @@ sketch
209211

210212
**/uv.lock
211213
**/mlruns/
214+
mlflow.db
212215
**/.vite/
213216
**/.databricks
214217
**/.claude
215218
**/.env.local
216219

217220
# Frontend build artifacts (Vite)
218-
# NOTE: frontend/dist/ is NOT ignored - it must be deployed to Databricks Apps
221+
# frontend/dist/ is deployed via databricks sync (not git)
219222
**/dist-ssr/
220223

221224
# Frontend debug logs

.gitleaksignore

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1-
frontend/dist/assets/index-CZvr7nrv.js:vault-service-token:8
2-
frontend/dist/assets/index-BEGK0X5-.js:vault-service-token:8
3-
frontend/dist/assets/index-DfyblZUj.js:vault-service-token:8
1+
# False positives from Vite-bundled React build artifacts (minified JS)
2+
dba8a46670f71554d7c6f85b8156c6ff8b927154:frontend/dist/assets/index-DfyblZUj.js:vault-service-token:8
3+
dba8a46670f71554d7c6f85b8156c6ff8b927154:frontend/dist/assets/index-BEGK0X5-.js:vault-service-token:8
4+
faff5e63c36691d4cd42c63cdd9c47c7a6e0fc71:frontend/dist/assets/index-BEGK0X5-.js:vault-service-token:8
5+
faff5e63c36691d4cd42c63cdd9c47c7a6e0fc71:frontend/dist/assets/index-Dkgq7VUt.js:vault-service-token:8
6+
fa428d65a8d509b4b544de096ccbcff4ef4a3d1a:frontend/dist/assets/index-Dkgq7VUt.js:vault-service-token:8

app.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ env:
3838
value: "databricks-claude-sonnet-4-6"
3939

4040
# ---------------------------------------------------------------------------
41-
# SQL Warehouse (required for GenieIQ benchmark execution)
41+
# SQL Warehouse — pulled from the app resource named "sql-warehouse".
42+
# Configure the warehouse resource in the Databricks Apps UI.
43+
# Falls back to auto-detect if empty.
4244
# ---------------------------------------------------------------------------
4345
- name: SQL_WAREHOUSE_ID
44-
value: "" # Required: your SQL warehouse ID
46+
valueFrom: sql-warehouse
4547

4648
# ---------------------------------------------------------------------------
4749
# Genie Space target directory (where new spaces are created)

backend/genie_creator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ def create_genie_space(
308308
warehouse_id = get_sql_warehouse_id()
309309
if not warehouse_id:
310310
raise ValueError(
311-
"SQL_WAREHOUSE_ID must be configured to create Genie Spaces. "
312-
"Set it to your SQL Warehouse ID."
311+
"No SQL warehouse available. Ensure you have access to at least "
312+
"one running Pro or Serverless SQL warehouse."
313313
)
314314

315315
t0 = _time.monotonic()

backend/main.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,51 @@ def _validate_mlflow_experiment() -> bool:
6565
from starlette.requests import Request
6666
from starlette.responses import Response
6767

68-
from backend.services.auth import is_running_on_databricks_apps
68+
from backend.services.auth import is_running_on_databricks_apps, set_obo_user_token, clear_obo_user_token
6969
from backend.routers.analysis import router as analysis_router
7070
from backend.routers.spaces import router as spaces_router
7171
from backend.routers.admin import router as admin_router
7272
from backend.routers.auth import router as auth_router
7373
from backend.routers.create import router as create_router
7474

7575

76+
class OBOAuthMiddleware(BaseHTTPMiddleware):
77+
"""Extract the user's access token and set a per-request OBO client.
78+
79+
On Databricks Apps the platform forwards the user's OAuth token in the
80+
``x-forwarded-access-token`` header (NOT the standard Authorization
81+
header). We store it in a ContextVar so that every
82+
``get_workspace_client()`` call in the request path returns a client
83+
authenticated as the user — not the service principal.
84+
85+
Ref: https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#user-authorization
86+
87+
For streaming endpoints (SSE), the ContextVar is NOT cleared after
88+
``call_next`` because the response body streams lazily. Instead,
89+
streaming handlers must call ``set_obo_user_token`` themselves from
90+
within the generator (the token is stashed on ``request.state``).
91+
"""
92+
93+
async def dispatch(self, request: Request, call_next) -> Response:
94+
if request.url.path.startswith("/api/"):
95+
token = request.headers.get("x-forwarded-access-token", "")
96+
if token:
97+
set_obo_user_token(token)
98+
logger.info("OBO: using user token for %s", request.url.path)
99+
else:
100+
logger.info("OBO: no x-forwarded-access-token, using SP for %s", request.url.path)
101+
request.state.user_token = token
102+
else:
103+
request.state.user_token = ""
104+
105+
response = await call_next(request)
106+
107+
is_streaming = getattr(response, "media_type", "") == "text/event-stream"
108+
if not is_streaming:
109+
clear_obo_user_token()
110+
return response
111+
112+
76113
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
77114
"""Add security headers to all responses."""
78115

@@ -99,6 +136,7 @@ async def dispatch(self, request: Request, call_next) -> Response:
99136
logger.warning(f"MLflow git-based version tracking not configured: {e}")
100137

101138
app.add_middleware(SecurityHeadersMiddleware)
139+
app.add_middleware(OBOAuthMiddleware)
102140

103141
if not is_running_on_databricks_apps():
104142
app.add_middleware(

backend/routers/create.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import json
55
import logging
6-
from fastapi import APIRouter, HTTPException
6+
from fastapi import APIRouter, HTTPException, Request
77
from fastapi.responses import StreamingResponse
88
from pydantic import BaseModel, Field
99

@@ -122,7 +122,7 @@ class AgentChatRequest(BaseModel):
122122

123123

124124
@router.post("/agent/chat")
125-
async def agent_chat(body: AgentChatRequest):
125+
async def agent_chat(body: AgentChatRequest, request: Request):
126126
"""Conversational endpoint for the Create Genie agent.
127127
128128
Returns a streaming SSE response with typed events:
@@ -138,6 +138,7 @@ async def agent_chat(body: AgentChatRequest):
138138
from backend.services.create_agent_session import (
139139
create_session, get_session_async, persist_session,
140140
)
141+
from backend.services.auth import set_obo_user_token, clear_obo_user_token
141142

142143
agent = get_create_agent()
143144

@@ -152,12 +153,21 @@ async def agent_chat(body: AgentChatRequest):
152153
if body.selections:
153154
user_message += f"\n\n[User selections: {json.dumps(body.selections)}]"
154155

156+
# Capture the user token so the streaming generator can re-establish
157+
# the OBO context (ContextVars don't propagate into async generators
158+
# that outlive the middleware's call_next).
159+
user_token = getattr(request.state, "user_token", "")
160+
155161
async def event_stream():
156-
yield _sse_event("session", {"session_id": session.session_id})
157-
async for event in agent.chat(session, user_message):
158-
yield _sse_event(event["event"], event["data"])
159-
# Persist session to Lakebase after the turn completes
160-
await persist_session(session)
162+
if user_token:
163+
set_obo_user_token(user_token)
164+
try:
165+
yield _sse_event("session", {"session_id": session.session_id})
166+
async for event in agent.chat(session, user_message):
167+
yield _sse_event(event["event"], event["data"])
168+
await persist_session(session)
169+
finally:
170+
clear_obo_user_token()
161171

162172
return StreamingResponse(
163173
event_stream(),

backend/services/auth.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,71 @@
11
"""
22
Authentication utilities for Databricks Apps deployment.
33
4-
Uses service principal authentication when running on Databricks Apps,
5-
and falls back to PAT token or CLI authentication for local development.
4+
On Databricks Apps, uses OBO (On Behalf Of) — each request creates a
5+
WorkspaceClient with the user's forwarded token so all SDK calls (SQL,
6+
UC, serving endpoints) execute under the user's identity and permissions.
7+
8+
Locally, falls back to PAT token or CLI profile (singleton client).
69
"""
710

811
import logging
912
import os
13+
from contextvars import ContextVar
1014

1115
from databricks.sdk import WorkspaceClient
16+
from databricks.sdk.config import Config
1217

1318
logger = logging.getLogger(__name__)
1419

15-
# Singleton client — avoids re-reading ~/.databrickscfg on every call
20+
# Singleton client for local dev (or fallback when no user token is available)
1621
_client: WorkspaceClient | None = None
1722
_auth_logged = False
1823

24+
# Per-request OBO client stored in a context variable
25+
_obo_client: ContextVar[WorkspaceClient | None] = ContextVar("_obo_client", default=None)
26+
1927

2028
def is_running_on_databricks_apps() -> bool:
2129
"""Check if running on Databricks Apps (vs local development)."""
2230
return os.environ.get("DATABRICKS_APP_PORT") is not None
2331

2432

25-
def get_workspace_client() -> WorkspaceClient:
26-
"""Get a cached Databricks WorkspaceClient with appropriate authentication.
33+
def set_obo_user_token(token: str) -> None:
34+
"""Set the user's OBO token for the current request context.
2735
28-
The client is created once and reused for the lifetime of the process.
29-
On Databricks Apps it uses the service principal; locally it uses
30-
PAT token or CLI profile.
36+
Call this from middleware/dependencies with the user's Authorization
37+
header value. Creates a per-request WorkspaceClient that authenticates
38+
as the user.
39+
40+
We must explicitly set ``auth_type="pat"`` because the Databricks Apps
41+
environment has DATABRICKS_CLIENT_ID / DATABRICKS_CLIENT_SECRET set,
42+
and the SDK would otherwise use oauth-m2m instead of the user's token.
3143
"""
44+
host = os.environ.get("DATABRICKS_HOST", "")
45+
if not host:
46+
default = _get_default_client()
47+
host = default.config.host or ""
48+
49+
cfg = Config(
50+
host=host,
51+
token=token,
52+
auth_type="pat",
53+
# Prevent the SDK from reading env vars that would override the token
54+
client_id=None,
55+
client_secret=None, # gitleaks:allow
56+
)
57+
client = WorkspaceClient(config=cfg)
58+
_obo_client.set(client)
59+
logger.debug("OBO client set for current request (host=%s, auth=%s)", host, cfg.auth_type)
60+
61+
62+
def clear_obo_user_token() -> None:
63+
"""Clear the per-request OBO client after the request completes."""
64+
_obo_client.set(None)
65+
66+
67+
def _get_default_client() -> WorkspaceClient:
68+
"""Get the default singleton client (SP on Apps, CLI/PAT locally)."""
3269
global _client, _auth_logged
3370

3471
if _client is None:
@@ -61,15 +98,27 @@ def get_workspace_client() -> WorkspaceClient:
6198
return _client
6299

63100

101+
def get_workspace_client() -> WorkspaceClient:
102+
"""Get the WorkspaceClient for the current context.
103+
104+
Returns the OBO (per-user) client if set, otherwise the default
105+
singleton. This ensures all SDK calls in the request path use the
106+
user's credentials when running on Databricks Apps.
107+
"""
108+
obo = _obo_client.get()
109+
if obo is not None:
110+
return obo
111+
return _get_default_client()
112+
113+
64114
def get_databricks_host() -> str:
65115
"""Get the Databricks workspace host URL (without trailing slash)."""
66-
client = get_workspace_client()
116+
client = _get_default_client()
67117
host = client.config.host
68118
return host.rstrip("/") if host else ""
69119

70120

71121
def get_llm_api_key() -> str:
72122
"""Get the API key for LLM serving endpoints."""
73-
if is_running_on_databricks_apps():
74-
return get_workspace_client().config.token or ""
75-
return os.environ.get("DATABRICKS_TOKEN", "")
123+
client = get_workspace_client()
124+
return client.config.token or os.environ.get("DATABRICKS_TOKEN", "")

0 commit comments

Comments
 (0)