Skip to content

Commit d255eee

Browse files
simonfaltumdenik
authored andcommitted
auth: extract ?o=/?a= from DATABRICKS_HOST env var (#5337)
## Why Pasting a SPOG URL from the Databricks UI (e.g. `https://acme.azuredatabricks.net/?o=12345`) into `DATABRICKS_HOST` drops the workspace identifier before any API call goes out. The SDK strips path and query from `Host` in `fixHostIfNeeded` without promoting `?o=` to `WorkspaceID`, so the request goes to the SPOG hostname without an `X-Databricks-Org-Id` header. The server can't route it and answers with the login HTML page, which surfaces as: ``` $ DATABRICKS_HOST=https://acme.azuredatabricks.net/?o=12345 databricks bundle validate Error: received HTML response instead of JSON ``` The bundle YAML `workspace.host` field is already normalized via `NormalizeHostURL` in `bundle/config/workspace.go`, and `databricks api` handles `?o=` per call (#5137). The env-var path was the remaining gap. ## Changes Before: `DATABRICKS_HOST=https://acme.databricks.net/?o=12345` reached the SDK with the query intact, the SDK dropped it, and `WorkspaceID` stayed empty. Now: `auth.NormalizeDatabricksHostEnv` runs once at the top of `root.Execute`, before the SDK reads anything. It uses the existing `ExtractHostQueryParams` helper to promote `?o=` / `?workspace_id=` to `DATABRICKS_WORKSPACE_ID` and `?a=` / `?account_id=` to `DATABRICKS_ACCOUNT_ID` (only when those env vars are unset), then rewrites `DATABRICKS_HOST` without the query string. A follow-up PR will push the same normalization into the SDK's `fixHostIfNeeded` so Python/Java/JS SDK users and any direct Go-SDK callers get the same fix without going through the CLI. ## Test plan - [x] `go test ./libs/auth/` covers the new helper with table-driven cases: `?o=` promotion, `?a=` + `?o=` together, existing `DATABRICKS_WORKSPACE_ID` is preserved, hosts without query are untouched, non-numeric `?o=` is dropped, unset `DATABRICKS_HOST` is a no-op. - [x] `go test ./cmd/root/` passes. - [x] `./task checks`. - [x] Manual end-to-end repro with a local SPOG-shaped test server: before the fix the SDK sent no `X-Databricks-Org-Id` header and got HTML back; after the fix the header is `258628866953061` and `bundle validate` proceeds to the workspace API.
1 parent f084bd8 commit d255eee

4 files changed

Lines changed: 132 additions & 0 deletions

File tree

cmd/api/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ func makeCommand(method string) *cobra.Command {
9494
cfg.Profile = databrickscfg.ResolveDefaultProfile(cmd.Context())
9595
}
9696

97+
auth.NormalizeDatabricksConfigFromEnv(cmd.Context(), cfg)
98+
9799
api, err := client.New(cfg)
98100
if err != nil {
99101
return err

cmd/root/auth.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ func MustAccountClient(cmd *cobra.Command, args []string) error {
152152
}
153153

154154
ctx := cmd.Context()
155+
auth.NormalizeDatabricksConfigFromEnv(ctx, cfg)
155156
ctx = cmdctx.SetConfigUsed(ctx, cfg)
156157
cmd.SetContext(ctx)
157158

@@ -250,6 +251,7 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error {
250251
cfg.Profile = profile
251252
}
252253

254+
auth.NormalizeDatabricksConfigFromEnv(ctx, cfg)
253255
resolveDefaultProfile(ctx, cfg)
254256

255257
_, isTargetFlagSet := targetFlagValue(cmd)

libs/auth/host_env.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package auth
2+
3+
import (
4+
"context"
5+
6+
"github.com/databricks/cli/libs/env"
7+
sdkconfig "github.com/databricks/databricks-sdk-go/config"
8+
)
9+
10+
// SPOG URLs from the Databricks UI carry the workspace ID as a ?o= query
11+
// parameter and the account ID as ?a=, e.g.
12+
// https://acme.databricks.net/?o=12345. The SDK strips path and query from
13+
// Host in fixHostIfNeeded without extracting these IDs, so a DATABRICKS_HOST
14+
// env var with such a URL drops the workspace identifier and API calls hit
15+
// the SPOG without an X-Databricks-Org-Id header, which the server answers
16+
// with HTML (a login page) instead of JSON.
17+
//
18+
// TODO: stopgap. The matching SDK fix is databricks/databricks-sdk-go#1699,
19+
// which handles ?o=/?a= directly in fixHostIfNeeded. Delete this helper on
20+
// the next SDK bump that includes that change.
21+
22+
// NormalizeDatabricksConfigFromEnv promotes ?o=/?workspace_id= and
23+
// ?a=/?account_id= query parameters from the DATABRICKS_HOST env var into
24+
// the matching fields on cfg, and sets cfg.Host to the stripped URL. It
25+
// does not mutate process env, so the effect is scoped to the SDK config
26+
// built from this cfg (and any subprocess env derived from it via
27+
// auth.Env).
28+
//
29+
// Only fills in empty fields. If cfg.Host is already set, the query
30+
// params aren't promoted at all (an explicit host takes priority). If a
31+
// dedicated env var (DATABRICKS_WORKSPACE_ID, DATABRICKS_ACCOUNT_ID) is
32+
// set, that more explicit signal wins over the query param.
33+
func NormalizeDatabricksConfigFromEnv(ctx context.Context, cfg *sdkconfig.Config) {
34+
if cfg.Host != "" {
35+
return
36+
}
37+
host, ok := env.Lookup(ctx, "DATABRICKS_HOST")
38+
if !ok || host == "" {
39+
return
40+
}
41+
params := ExtractHostQueryParams(host)
42+
if params.Host == host {
43+
return
44+
}
45+
cfg.Host = params.Host
46+
if cfg.WorkspaceID == "" && params.WorkspaceID != "" && env.Get(ctx, "DATABRICKS_WORKSPACE_ID") == "" {
47+
cfg.WorkspaceID = params.WorkspaceID
48+
}
49+
if cfg.AccountID == "" && params.AccountID != "" && env.Get(ctx, "DATABRICKS_ACCOUNT_ID") == "" {
50+
cfg.AccountID = params.AccountID
51+
}
52+
}

libs/auth/host_env_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package auth
2+
3+
import (
4+
"testing"
5+
6+
"github.com/databricks/cli/libs/env"
7+
sdkconfig "github.com/databricks/databricks-sdk-go/config"
8+
"github.com/stretchr/testify/assert"
9+
)
10+
11+
func TestNormalizeDatabricksConfigFromEnv(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
host string
15+
envWorkspaceID string
16+
envAccountID string
17+
cfgInHost string
18+
wantHost string
19+
wantWorkspaceID string
20+
wantAccountID string
21+
}{
22+
{
23+
name: "spog url promotes workspace id",
24+
host: "https://acme.databricks.net/?o=12345",
25+
wantHost: "https://acme.databricks.net",
26+
wantWorkspaceID: "12345",
27+
},
28+
{
29+
name: "spog url with account id",
30+
host: "https://acme.databricks.net/?a=abc&o=12345",
31+
wantHost: "https://acme.databricks.net",
32+
wantWorkspaceID: "12345",
33+
wantAccountID: "abc",
34+
},
35+
{
36+
name: "host without query is a no-op",
37+
host: "https://acme.databricks.net",
38+
},
39+
{
40+
name: "env workspace id wins over query param",
41+
host: "https://acme.databricks.net/?o=12345",
42+
envWorkspaceID: "99999",
43+
wantHost: "https://acme.databricks.net",
44+
wantWorkspaceID: "",
45+
},
46+
{
47+
name: "cfg host already set leaves env alone",
48+
host: "https://other.databricks.net/?o=12345",
49+
cfgInHost: "https://acme.databricks.net",
50+
wantHost: "https://acme.databricks.net",
51+
},
52+
{
53+
name: "no host env is a no-op",
54+
},
55+
{
56+
name: "non-numeric o is dropped, host trailing slash trimmed",
57+
host: "https://acme.databricks.net/?o=notanumber",
58+
wantHost: "https://acme.databricks.net",
59+
},
60+
}
61+
62+
for _, tt := range tests {
63+
t.Run(tt.name, func(t *testing.T) {
64+
ctx := env.Set(t.Context(), "DATABRICKS_HOST", tt.host)
65+
ctx = env.Set(ctx, "DATABRICKS_WORKSPACE_ID", tt.envWorkspaceID)
66+
ctx = env.Set(ctx, "DATABRICKS_ACCOUNT_ID", tt.envAccountID)
67+
68+
cfg := &sdkconfig.Config{Host: tt.cfgInHost}
69+
NormalizeDatabricksConfigFromEnv(ctx, cfg)
70+
71+
assert.Equal(t, tt.wantHost, cfg.Host)
72+
assert.Equal(t, tt.wantWorkspaceID, cfg.WorkspaceID)
73+
assert.Equal(t, tt.wantAccountID, cfg.AccountID)
74+
})
75+
}
76+
}

0 commit comments

Comments
 (0)