-
Notifications
You must be signed in to change notification settings - Fork 189
Expand file tree
/
Copy pathDatabricksParameters.cs
More file actions
340 lines (292 loc) · 16.1 KB
/
DatabricksParameters.cs
File metadata and controls
340 lines (292 loc) · 16.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using System.Collections.Generic;
namespace Apache.Arrow.Adbc.Drivers.Databricks
{
/// <summary>
/// Parameters used for connecting to Databricks data sources.
/// </summary>
public class DatabricksParameters : SparkParameters
{
// CloudFetch configuration parameters
/// <summary>
/// Whether to use CloudFetch for retrieving results.
/// Default value is true if not specified.
/// </summary>
public const string UseCloudFetch = "adbc.databricks.cloudfetch.enabled";
/// <summary>
/// Whether the client can decompress LZ4 compressed results.
/// Default value is true if not specified.
/// </summary>
public const string CanDecompressLz4 = "adbc.databricks.cloudfetch.lz4.enabled";
/// <summary>
/// Maximum bytes per file for CloudFetch.
/// The value can be specified with unit suffixes: B (bytes), KB (kilobytes), MB (megabytes), GB (gigabytes).
/// If no unit is specified, the value is treated as bytes.
/// Default value is 20MB if not specified.
/// </summary>
public const string MaxBytesPerFile = "adbc.databricks.cloudfetch.max_bytes_per_file";
/// <summary>
/// Maximum number of retry attempts for CloudFetch downloads.
/// Default value is 3 if not specified.
/// </summary>
public const string CloudFetchMaxRetries = "adbc.databricks.cloudfetch.max_retries";
/// <summary>
/// Delay in milliseconds between CloudFetch retry attempts.
/// Default value is 500ms if not specified.
/// </summary>
public const string CloudFetchRetryDelayMs = "adbc.databricks.cloudfetch.retry_delay_ms";
/// <summary>
/// Timeout in minutes for CloudFetch HTTP operations.
/// Default value is 5 minutes if not specified.
/// </summary>
public const string CloudFetchTimeoutMinutes = "adbc.databricks.cloudfetch.timeout_minutes";
/// <summary>
/// Buffer time in seconds before URL expiration to trigger refresh.
/// Default value is 60 seconds if not specified.
/// </summary>
public const string CloudFetchUrlExpirationBufferSeconds = "adbc.databricks.cloudfetch.url_expiration_buffer_seconds";
/// <summary>
/// Maximum number of URL refresh attempts for CloudFetch downloads.
/// Default value is 3 if not specified.
/// </summary>
public const string CloudFetchMaxUrlRefreshAttempts = "adbc.databricks.cloudfetch.max_url_refresh_attempts";
/// <summary>
/// Whether to enable the use of direct results when executing queries.
/// Default value is true if not specified.
/// </summary>
public const string EnableDirectResults = "adbc.databricks.enable_direct_results";
/// <summary>
/// Whether to apply service side properties (SSP) with queries. If false, SSP will be applied
/// by setting the Thrift configuration when the session is opened.
/// Default value is false if not specified.
/// </summary>
public const string ApplySSPWithQueries = "adbc.databricks.apply_ssp_with_queries";
/// <summary>
/// Prefix for server-side properties. Properties with this prefix will be passed to the server
/// by executing a "set key=value" query when opening a session.
/// For example, a property with key "adbc.databricks.ssp_use_cached_result"
/// and value "true" will result in executing "set use_cached_result=true" on the server.
/// </summary>
public const string ServerSidePropertyPrefix = "adbc.databricks.ssp_";
/// Controls whether to retry requests that receive a 503 response with a Retry-After header.
/// Default value is true (enabled). Set to false to disable retry behavior.
/// </summary>
public const string TemporarilyUnavailableRetry = "adbc.spark.temporarily_unavailable_retry";
/// <summary>
/// Maximum total time in seconds to retry 503 responses before failing.
/// Default value is 900 seconds (15 minutes). Set to 0 to retry indefinitely.
/// </summary>
public const string TemporarilyUnavailableRetryTimeout = "adbc.spark.temporarily_unavailable_retry_timeout";
/// <summary>
/// Maximum number of parallel downloads for CloudFetch operations.
/// Default value is 3 if not specified.
/// </summary>
public const string CloudFetchParallelDownloads = "adbc.databricks.cloudfetch.parallel_downloads";
/// <summary>
/// Number of files to prefetch in CloudFetch operations.
/// Default value is 2 if not specified.
/// </summary>
public const string CloudFetchPrefetchCount = "adbc.databricks.cloudfetch.prefetch_count";
/// <summary>
/// Maximum memory buffer size in MB for CloudFetch prefetched files.
/// Default value is 200MB if not specified.
/// </summary>
public const string CloudFetchMemoryBufferSize = "adbc.databricks.cloudfetch.memory_buffer_size_mb";
/// <summary>
/// Whether CloudFetch prefetch functionality is enabled.
/// Default value is true if not specified.
/// </summary>
public const string CloudFetchPrefetchEnabled = "adbc.databricks.cloudfetch.prefetch_enabled";
/// <summary>
/// Whether to enable straggler download detection and mitigation for CloudFetch operations.
/// Default value is false if not specified.
/// </summary>
public const string CloudFetchStragglerMitigationEnabled = "adbc.databricks.cloudfetch.straggler_mitigation_enabled";
/// <summary>
/// Multiplier used to determine straggler threshold based on median throughput.
/// Default value is 1.5 if not specified.
/// </summary>
public const string CloudFetchStragglerMultiplier = "adbc.databricks.cloudfetch.straggler_multiplier";
/// <summary>
/// Fraction of downloads that must complete before straggler detection begins.
/// Valid range: 0.0 to 1.0. Default value is 0.6 (60%) if not specified.
/// </summary>
public const string CloudFetchStragglerQuantile = "adbc.databricks.cloudfetch.straggler_quantile";
/// <summary>
/// Extra buffer time in seconds added to the straggler threshold calculation.
/// Default value is 5 seconds if not specified.
/// </summary>
public const string CloudFetchStragglerPaddingSeconds = "adbc.databricks.cloudfetch.straggler_padding_seconds";
/// <summary>
/// Maximum number of stragglers detected per query before triggering sequential download fallback.
/// Default value is 10 if not specified.
/// </summary>
public const string CloudFetchMaxStragglersPerQuery = "adbc.databricks.cloudfetch.max_stragglers_per_query";
/// <summary>
/// Whether to automatically fall back to sequential downloads when max stragglers threshold is exceeded.
/// Default value is false if not specified.
/// </summary>
public const string CloudFetchSynchronousFallbackEnabled = "adbc.databricks.cloudfetch.synchronous_fallback_enabled";
/// <summary>
/// Maximum bytes per fetch request when retrieving query results from servers.
/// The value can be specified with unit suffixes: B (bytes), KB (kilobytes), MB (megabytes), GB (gigabytes).
/// If no unit is specified, the value is treated as bytes.
/// Default value is 400MB if not specified.
/// </summary>
public const string MaxBytesPerFetchRequest = "adbc.databricks.max_bytes_per_fetch_request";
/// <summary>
/// The OAuth grant type to use for authentication.
/// Supported values:
/// - "access_token": Use a pre-generated Databricks personal access token (default)
/// - "client_credentials": Use OAuth client credentials flow for m2m authentication
/// When using "client_credentials", the driver will automatically handle token acquisition,
/// renewal, and authentication with the Databricks service.
/// </summary>
public const string OAuthGrantType = "adbc.databricks.oauth.grant_type";
/// <summary>
/// The OAuth client ID for client credentials flow.
/// Required when grant_type is "client_credentials".
/// This is the client ID you obtained when registering your application with Databricks.
/// </summary>
public const string OAuthClientId = "adbc.databricks.oauth.client_id";
/// <summary>
/// The OAuth client secret for client credentials flow.
/// Required when grant_type is "client_credentials".
/// This is the client secret you obtained when registering your application with Databricks.
/// </summary>
public const string OAuthClientSecret = "adbc.databricks.oauth.client_secret";
/// <summary>
/// The OAuth scope for client credentials flow.
/// Optional when grant_type is "client_credentials".
/// Default value is "sql" if not specified.
/// </summary>
public const string OAuthScope = "adbc.databricks.oauth.scope";
/// <summary>
/// Whether to use multiple catalogs.
/// Default value is true if not specified.
/// </summary>
public const string EnableMultipleCatalogSupport = "adbc.databricks.enable_multiple_catalog_support";
/// <summary>
/// Whether to enable primary key foreign key metadata call.
/// Default value is true if not specified.
/// </summary>
public const string EnablePKFK = "adbc.databricks.enable_pk_fk";
/// <summary>
/// Whether to use query DESC TABLE EXTENDED to get extended column metadata when the current DBR supports it
/// Default value is true if not specified.
/// </summary>
public const string UseDescTableExtended = "adbc.databricks.use_desc_table_extended";
/// <summary>
/// Whether to enable RunAsync flag in Thrift operation
/// Default value is true if not specified.
/// </summary>
public const string EnableRunAsyncInThriftOp = "adbc.databricks.enable_run_async_thrift";
/// <summary>
/// Whether to propagate trace parent headers in HTTP requests.
/// Default value is true if not specified.
/// When enabled, the driver will add W3C Trace Context headers to all HTTP requests.
/// </summary>
public const string TracePropagationEnabled = "adbc.databricks.trace_propagation.enabled";
/// <summary>
/// The name of the HTTP header to use for trace parent propagation.
/// Default value is "traceparent" (W3C standard) if not specified.
/// This allows customization for systems that use different header names.
/// </summary>
public const string TraceParentHeaderName = "adbc.databricks.trace_propagation.header_name";
/// <summary>
/// Whether to include trace state header in HTTP requests.
/// Default value is false if not specified.
/// When enabled, the driver will also propagate the tracestate header if available.
/// </summary>
public const string TraceStateEnabled = "adbc.databricks.trace_propagation.state_enabled";
/// <summary>
/// The minutes before token expiration when we should start renewing the token.
/// Default value is 0 (disabled) if not specified.
/// </summary>
public const string TokenRenewLimit = "adbc.databricks.token_renew_limit";
/// <summary>
/// The client ID of the service principal when using workload identity federation.
/// Default value is empty if not specified.
/// </summary>
public const string IdentityFederationClientId = "adbc.databricks.identity_federation_client_id";
/// <summary>
/// Controls whether driver configuration takes precedence over passed-in properties during configuration merging.
/// When "true": Environment/driver config properties override passed-in constructor properties.
/// When "false" (default): Passed-in constructor properties override environment/driver config properties.
/// This property can be set either in the environment configuration file or in passed-in properties.
/// When set in both places, the value in passed-in properties takes precedence.
/// Default value is false if not specified.
/// </summary>
public const string DriverConfigTakePrecedence = "adbc.databricks.driver_config_take_precedence";
/// <summary>
/// The interval in seconds for heartbeat polling during long-running operations.
/// This prevents queries from timing out by periodically checking operation status.
/// Default value is 60 seconds if not specified.
/// Must be a positive integer value.
/// </summary>
public const string FetchHeartbeatInterval = "adbc.databricks.fetch_heartbeat_interval";
/// <summary>
/// The timeout in seconds for operation status polling requests.
/// This controls how long to wait for each individual polling request to complete.
/// Default value is 30 seconds if not specified.
/// Must be a positive integer value.
/// </summary>
public const string OperationStatusRequestTimeout = "adbc.databricks.operation_status_request_timeout";
}
/// <summary>
/// Constants used for default parameter values.
/// </summary>
public class DatabricksConstants
{
/// <summary>
/// Default heartbeat interval in seconds for long-running operations.
/// </summary>
public const int DefaultOperationStatusPollingIntervalSeconds = 60;
/// <summary>
/// Default timeout in seconds for operation status polling requests.
/// </summary>
public const int DefaultOperationStatusRequestTimeoutSeconds = 30;
/// <summary>
/// Default async execution poll interval in milliseconds.
/// </summary>
public const int DefaultAsyncExecPollIntervalMs = 100;
/// <summary>
/// OAuth grant type constants
/// </summary>
public static class OAuthGrantTypes
{
/// <summary>
/// Use a pre-generated Databricks personal access token for authentication.
/// When using this grant type, you must provide the token via the
/// adbc.spark.oauth.access_token parameter.
/// </summary>
public const string AccessToken = "access_token";
/// <summary>
/// Use OAuth client credentials flow for m2m authentication.
/// When using this grant type, you must provide:
/// - adbc.databricks.oauth.client_id: The OAuth client ID
/// - adbc.databricks.oauth.client_secret: The OAuth client secret
/// The driver will automatically handle token acquisition, renewal, and
/// authentication with the Databricks service.
/// </summary>
public const string ClientCredentials = "client_credentials";
}
}
}