-
Notifications
You must be signed in to change notification settings - Fork 60
Expand file tree
/
Copy pathconnector.go
More file actions
488 lines (432 loc) · 15.5 KB
/
connector.go
File metadata and controls
488 lines (432 loc) · 15.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
package dbsql
import (
"context"
"crypto/tls"
"database/sql/driver"
"fmt"
"net/http"
"net/url"
"strings"
"time"
"github.com/databricks/databricks-sql-go/auth"
"github.com/databricks/databricks-sql-go/auth/oauth/m2m"
"github.com/databricks/databricks-sql-go/auth/pat"
"github.com/databricks/databricks-sql-go/auth/tokenprovider"
"github.com/databricks/databricks-sql-go/driverctx"
dbsqlerr "github.com/databricks/databricks-sql-go/errors"
"github.com/databricks/databricks-sql-go/internal/cli_service"
"github.com/databricks/databricks-sql-go/internal/client"
"github.com/databricks/databricks-sql-go/internal/config"
dbsqlerrint "github.com/databricks/databricks-sql-go/internal/errors"
"github.com/databricks/databricks-sql-go/logger"
"github.com/databricks/databricks-sql-go/telemetry"
)
type connector struct {
cfg *config.Config
client *http.Client
}
// Connect returns a connection to the Databricks database from a connection pool.
func (c *connector) Connect(ctx context.Context) (driver.Conn, error) {
var catalogName *cli_service.TIdentifier
var schemaName *cli_service.TIdentifier
if c.cfg.Catalog != "" {
catalogName = cli_service.TIdentifierPtr(cli_service.TIdentifier(c.cfg.Catalog))
}
if c.cfg.Schema != "" {
schemaName = cli_service.TIdentifierPtr(cli_service.TIdentifier(c.cfg.Schema))
}
tclient, err := client.InitThriftClient(c.cfg, c.client)
if err != nil {
return nil, dbsqlerrint.NewDriverError(ctx, dbsqlerr.ErrThriftClient, err)
}
// Prepare session configuration
sessionParams := make(map[string]string)
for k, v := range c.cfg.SessionParams {
sessionParams[k] = v
}
if c.cfg.EnableMetricViewMetadata {
sessionParams["spark.sql.thriftserver.metadata.metricview.enabled"] = "true"
}
protocolVersion := int64(c.cfg.ThriftProtocolVersion)
sessionStart := time.Now()
session, err := tclient.OpenSession(ctx, &cli_service.TOpenSessionReq{
ClientProtocolI64: &protocolVersion,
Configuration: sessionParams,
InitialNamespace: &cli_service.TNamespace{
CatalogName: catalogName,
SchemaName: schemaName,
},
CanUseMultipleCatalogs: &c.cfg.CanUseMultipleCatalogs,
})
sessionLatencyMs := time.Since(sessionStart).Milliseconds()
if err != nil {
return nil, dbsqlerrint.NewRequestError(ctx, fmt.Sprintf("error connecting: host=%s port=%d, httpPath=%s", c.cfg.Host, c.cfg.Port, c.cfg.HTTPPath), err)
}
conn := &conn{
id: client.SprintGuid(session.SessionHandle.GetSessionId().GUID),
cfg: c.cfg,
client: tclient,
session: session,
}
log := logger.WithContext(conn.id, driverctx.CorrelationIdFromContext(ctx), "")
// Extract SPOG routing headers from ?o= in HTTPPath. When ?o=<workspaceId>
// is present (Custom URL / SPOG hosts), wrap the HTTP client used for
// telemetry + feature-flag calls with a transport that injects
// x-databricks-org-id. Thrift routes via the URL so its own c.client
// doesn't need wrapping.
telemetryClient := c.client
if spogHeaders := extractSpogHeaders(c.cfg.HTTPPath); len(spogHeaders) > 0 {
telemetryClient = withSpogHeaders(c.client, spogHeaders)
}
// Initialize telemetry: client config overlay decides; if unset, feature flags decide
conn.telemetry = telemetry.InitializeForConnection(ctx, telemetry.TelemetryInitOptions{
Host: c.cfg.Host,
DriverVersion: c.cfg.DriverVersion,
HTTPClient: telemetryClient,
EnableTelemetry: c.cfg.EnableTelemetry,
BatchSize: c.cfg.TelemetryBatchSize,
FlushInterval: c.cfg.TelemetryFlushInterval,
RetryCount: c.cfg.TelemetryRetryCount,
RetryDelay: c.cfg.TelemetryRetryDelay,
})
if conn.telemetry != nil {
log.Debug().Msg("telemetry initialized for connection")
conn.telemetry.RecordOperation(ctx, conn.id, "", telemetry.OperationTypeCreateSession, sessionLatencyMs, nil)
}
log.Info().Msgf("connect: host=%s port=%d httpPath=%s serverProtocolVersion=0x%X", c.cfg.Host, c.cfg.Port, c.cfg.HTTPPath, session.ServerProtocolVersion)
return conn, nil
}
// Driver returns underlying databricksDriver for compatibility with sql.DB Driver method
func (c *connector) Driver() driver.Driver {
return &databricksDriver{}
}
var _ driver.Connector = (*connector)(nil)
type ConnOption func(*config.Config)
// NewConnector creates a connection that can be used with `sql.OpenDB()`.
// This is an easier way to set up the DB instead of having to construct a DSN string.
func NewConnector(options ...ConnOption) (driver.Connector, error) {
// config with default options
cfg := config.WithDefaults()
cfg.DriverVersion = DriverVersion
for _, opt := range options {
opt(cfg)
}
client := client.RetryableClient(cfg)
return &connector{cfg: cfg, client: client}, nil
}
// extractSpogHeaders extracts ?o=<workspaceId> from httpPath and returns
// an x-databricks-org-id header for SPOG routing.
//
// On SPOG (Custom URL) workspaces, httpPath is of the form
// /sql/1.0/warehouses/<id>?o=<workspaceId>. The ?o= parameter keeps Thrift
// requests routed to the correct workspace via the URL itself, but other
// endpoints (telemetry, feature flags) run on separate hosts and need the
// x-databricks-org-id header. This function extracts ?o= from httpPath once
// and returns it so those paths can inject it as an HTTP header.
//
// Returns nil if:
// - httpPath has no query string ("?"), or
// - the query string is malformed and can't be parsed, or
// - the ?o= parameter is missing or empty.
func extractSpogHeaders(httpPath string) map[string]string {
if !strings.Contains(httpPath, "?") {
return nil
}
// Parse query string from httpPath
parts := strings.SplitN(httpPath, "?", 2)
params, err := url.ParseQuery(parts[1])
if err != nil {
logger.Debug().Msgf(
"SPOG header extraction: malformed query string in httpPath, skipping org-id extraction: %s",
err)
return nil
}
orgID := params.Get("o")
if orgID == "" {
logger.Debug().Msg(
"SPOG header extraction: httpPath has query string but no ?o= param, " +
"skipping x-databricks-org-id injection")
return nil
}
logger.Debug().Msgf(
"SPOG header extraction: injecting x-databricks-org-id=%s (extracted from ?o= in httpPath)",
orgID)
return map[string]string{"x-databricks-org-id": orgID}
}
// withSpogHeaders returns a new *http.Client that reuses the transport of the
// provided client, wrapped to inject the given SPOG headers on every outbound
// request. The original client is left unchanged. If a request already has a
// given header set (e.g., the caller set it explicitly), the wrapper does not
// override it.
//
// This is how the driver gets x-databricks-org-id onto both the feature-flag
// check and the telemetry push without touching the telemetry package's
// signatures.
func withSpogHeaders(base *http.Client, headers map[string]string) *http.Client {
baseTransport := base.Transport
if baseTransport == nil {
baseTransport = http.DefaultTransport
}
return &http.Client{
Transport: &headerInjectingTransport{
base: baseTransport,
headers: headers,
},
CheckRedirect: base.CheckRedirect,
Jar: base.Jar,
Timeout: base.Timeout,
}
}
// headerInjectingTransport wraps an http.RoundTripper and sets a fixed set of
// headers on every outbound request. Caller-supplied headers with the same
// name are not overridden.
type headerInjectingTransport struct {
base http.RoundTripper
headers map[string]string
}
// RoundTrip implements http.RoundTripper.
func (t *headerInjectingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// Clone per RoundTripper contract — must not mutate the caller's request.
req2 := req.Clone(req.Context())
for k, v := range t.headers {
if req2.Header.Get(k) == "" {
req2.Header.Set(k, v)
}
}
return t.base.RoundTrip(req2)
}
func withUserConfig(ucfg config.UserConfig) ConnOption {
return func(c *config.Config) {
c.UserConfig = ucfg
}
}
// WithServerHostname sets up the server hostname. Mandatory.
func WithServerHostname(host string) ConnOption {
return func(c *config.Config) {
protocol, hostname := parseHostName(host)
if protocol != "" {
c.Protocol = protocol
}
c.Host = hostname
}
}
func parseHostName(host string) (protocol, hostname string) {
hostname = host
if strings.HasPrefix(host, "https") {
hostname = strings.TrimPrefix(host, "https")
protocol = "https"
} else if strings.HasPrefix(host, "http") {
hostname = strings.TrimPrefix(host, "http")
protocol = "http"
}
if protocol != "" {
hostname = strings.TrimPrefix(hostname, ":")
hostname = strings.TrimPrefix(hostname, "//")
}
if hostname == "localhost" && protocol == "" {
protocol = "http"
}
return
}
// WithPort sets up the server port. Mandatory.
func WithPort(port int) ConnOption {
return func(c *config.Config) {
c.Port = port
}
}
// WithRetries sets up retrying logic. Sane defaults are provided. Negative retryMax will disable retry behavior
// By default retryWaitMin = 1 * time.Second
// By default retryWaitMax = 30 * time.Second
// By default retryMax = 4
func WithRetries(retryMax int, retryWaitMin time.Duration, retryWaitMax time.Duration) ConnOption {
return func(c *config.Config) {
c.RetryWaitMax = retryWaitMax
c.RetryWaitMin = retryWaitMin
c.RetryMax = retryMax
}
}
// WithAccessToken sets up the Personal Access Token. Mandatory for now.
func WithAccessToken(token string) ConnOption {
return func(c *config.Config) {
if token != "" {
c.AccessToken = token
pat := &pat.PATAuth{
AccessToken: token,
}
c.Authenticator = pat
}
}
}
// WithHTTPPath sets up the endpoint to the warehouse. Mandatory.
func WithHTTPPath(path string) ConnOption {
return func(c *config.Config) {
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
c.HTTPPath = path
}
}
// WithMaxRows sets up the max rows fetched per request. Default is 10000
func WithMaxRows(n int) ConnOption {
return func(c *config.Config) {
if n != 0 {
c.MaxRows = n
}
}
}
// WithTimeout adds timeout for the server query execution. Default is no timeout.
func WithTimeout(n time.Duration) ConnOption {
return func(c *config.Config) {
c.QueryTimeout = n
}
}
// Sets the initial catalog name and schema name in the session.
// Use <select * from foo> instead of <select * from catalog.schema.foo>
func WithInitialNamespace(catalog, schema string) ConnOption {
return func(c *config.Config) {
c.Catalog = catalog
c.Schema = schema
}
}
// Used to identify partners. Set as a string with format <isv-name+product-name>.
func WithUserAgentEntry(entry string) ConnOption {
return func(c *config.Config) {
c.UserAgentEntry = entry
}
}
// Session parameters are passed directly in TOpenSessionReq.Configuration during session creation.
func WithSessionParams(params map[string]string) ConnOption {
return func(c *config.Config) {
for k, v := range params {
if strings.ToLower(k) == "timezone" {
if loc, err := time.LoadLocation(v); err != nil {
logger.Error().Msgf("timezone %s is not valid", v)
} else {
c.Location = loc
}
}
}
c.SessionParams = params
}
}
// WithQueryTags sets session-level query tags from a map.
// Tags are serialized and passed as QUERY_TAGS in the session configuration.
// All queries in the session will carry these tags unless overridden at the statement level.
// This is the preferred way to set session-level query tags, as it handles serialization
// and escaping automatically (consistent with the statement-level API).
func WithQueryTags(tags map[string]string) ConnOption {
return func(c *config.Config) {
serialized := SerializeQueryTags(tags)
if serialized != "" {
if c.SessionParams == nil {
c.SessionParams = make(map[string]string)
}
c.SessionParams["QUERY_TAGS"] = serialized
}
}
}
// WithSkipTLSHostVerify disables the verification of the hostname in the TLS certificate.
// WARNING:
// When this option is used, TLS is susceptible to machine-in-the-middle attacks.
// Please only use this option when the hostname is an internal private link hostname
func WithSkipTLSHostVerify() ConnOption {
return func(c *config.Config) {
if c.TLSConfig == nil {
c.TLSConfig = &tls.Config{MinVersion: tls.VersionTLS12, InsecureSkipVerify: true} // #nosec G402
} else {
c.TLSConfig.InsecureSkipVerify = true // #nosec G402
}
}
}
// WithAuthenticator sets up the Authentication. Mandatory if access token is not provided.
func WithAuthenticator(authr auth.Authenticator) ConnOption {
return func(c *config.Config) {
c.Authenticator = authr
}
}
// WithTransport sets up the transport configuration to be used by the httpclient.
func WithTransport(t http.RoundTripper) ConnOption {
return func(c *config.Config) {
c.Transport = t
if c.HTTPClient == nil {
c.HTTPClient = &http.Client{
Transport: t,
}
}
}
}
// WithCloudFetch sets up the use of cloud fetch for query execution. Default is false.
func WithCloudFetch(useCloudFetch bool) ConnOption {
return func(c *config.Config) {
c.UseCloudFetch = useCloudFetch
}
}
// WithMaxDownloadThreads sets up maximum download threads for cloud fetch. Default is 10.
func WithMaxDownloadThreads(numThreads int) ConnOption {
return func(c *config.Config) {
c.MaxDownloadThreads = numThreads
}
}
// WithEnableMetricViewMetadata enables metric view metadata support. Default is false.
// When enabled, adds spark.sql.thriftserver.metadata.metricview.enabled=true to session configuration.
func WithEnableMetricViewMetadata(enable bool) ConnOption {
return func(c *config.Config) {
c.EnableMetricViewMetadata = enable
}
}
// Setup of Oauth M2m authentication
func WithClientCredentials(clientID, clientSecret string) ConnOption {
return func(c *config.Config) {
if clientID != "" && clientSecret != "" {
authr := m2m.NewAuthenticator(clientID, clientSecret, c.Host)
c.Authenticator = authr
}
}
}
// WithTokenProvider sets up authentication using a custom token provider
func WithTokenProvider(provider tokenprovider.TokenProvider) ConnOption {
return func(c *config.Config) {
if provider != nil {
c.Authenticator = tokenprovider.NewAuthenticator(provider)
}
}
}
// WithExternalToken sets up authentication using an external token function (passthrough)
func WithExternalToken(tokenFunc func() (string, error)) ConnOption {
return func(c *config.Config) {
if tokenFunc != nil {
provider := tokenprovider.NewExternalTokenProvider(tokenFunc)
c.Authenticator = tokenprovider.NewAuthenticator(provider)
}
}
}
// WithStaticToken sets up authentication using a static token
func WithStaticToken(token string) ConnOption {
return func(c *config.Config) {
if token != "" {
provider := tokenprovider.NewStaticTokenProvider(token)
c.Authenticator = tokenprovider.NewAuthenticator(provider)
}
}
}
// WithFederatedTokenProvider sets up authentication using token federation
// It wraps the base provider and automatically handles token exchange if needed
func WithFederatedTokenProvider(baseProvider tokenprovider.TokenProvider) ConnOption {
return func(c *config.Config) {
if baseProvider != nil {
// Wrap with federation provider that auto-detects need for token exchange
federationProvider := tokenprovider.NewFederationProvider(baseProvider, c.Host)
c.Authenticator = tokenprovider.NewAuthenticator(federationProvider)
}
}
}
// WithFederatedTokenProviderAndClientID sets up SP-wide token federation
func WithFederatedTokenProviderAndClientID(baseProvider tokenprovider.TokenProvider, clientID string) ConnOption {
return func(c *config.Config) {
if baseProvider != nil {
// Wrap with federation provider for SP-wide federation
federationProvider := tokenprovider.NewFederationProviderWithClientID(baseProvider, c.Host, clientID)
c.Authenticator = tokenprovider.NewAuthenticator(federationProvider)
}
}
}