Skip to content

Commit 86ee26b

Browse files
committed
[SEA-NodeJS] Kernel-parity batch: mTLS + custom headers/UA + retry/backoff + operation-status fields
Squashed rebase of the SEA kernel-parity batch onto current main (replaces the prior multi-commit branch, which had diverged with unsigned commits and a merge-commit history that conflicted with the since-merged #424/#426). Net feature set unchanged: - mTLS (clientCertPem/clientKeyPem) + custom HTTP headers + User-Agent entry on the SEA path, with PEM and header-token (CR/LF/NUL) validation in SeaAuth. - Retry/backoff tuning forwarded to the kernel (omitting unset knobs). - Operation-status fields surfaced through op.status(): numModifiedRows, displayMessage, diagnosticInfo, errorDetailsJson (async + sync paths), memoized once terminal; Thrift wire synthesis maps the same fields. - preserveBigNumericPrecision connection option (Thrift + SEA) and DATE/large-number parameter type-inference fix. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent 7040a95 commit 86ee26b

26 files changed

Lines changed: 1391 additions & 86 deletions

KERNEL_REV

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
80b68e1eef3b613910183a50dfa4dace854d50dd
1+
75bfa526cf70c6c37d565e3cbb19c6d922714174

lib/DBSQLClient.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
152152

153153
useLZ4Compression: true,
154154

155+
preserveBigNumericPrecision: false,
156+
155157
// Telemetry defaults are sourced from DEFAULT_TELEMETRY_CONFIG so
156158
// every component reads from the same single frozen const. Mapping the
157159
// unprefixed TelemetryConfiguration keys to the `telemetry`-prefixed
@@ -604,6 +606,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
604606
this.config.enableMetricViewMetadata = options.enableMetricViewMetadata;
605607
}
606608

609+
// Opt-in: preserve DECIMAL (string) / BIGINT (bigint) precision in results.
610+
if (options.preserveBigNumericPrecision !== undefined) {
611+
this.config.preserveBigNumericPrecision = options.preserveBigNumericPrecision;
612+
}
613+
607614
// Override telemetry config if provided in options. Per-key narrowed copy
608615
// preserves the structural type system: `ConnectionOptions` and
609616
// `ClientConfig` declare identical types for these knobs, so a user

lib/DBSQLParameter.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,37 @@ export enum DBSQLParameterType {
2929
INTERVALDAY = 'INTERVAL DAY',
3030
}
3131

32+
// 32-bit signed integer bounds — the range of the Spark `INT` type.
33+
const INT32_MIN = -2147483648;
34+
const INT32_MAX = 2147483647;
35+
36+
/**
37+
* Infer the Spark parameter type for a JS `number` when the caller didn't set
38+
* one explicitly.
39+
*
40+
* A JS `number` is an IEEE-754 double, so a whole-number value can still be far
41+
* outside the `INT` range (e.g. `1e30`). Typing such a value as `INTEGER`
42+
* makes the server reject it (`invalid INT literal "1e+30"`). Pick the
43+
* narrowest type that actually fits:
44+
* - non-integer / non-finite → `DOUBLE`
45+
* - integer within INT (i32) range → `INTEGER`
46+
* - integer within the safe-integer range → `BIGINT`
47+
* - anything larger → `DOUBLE` (can't be represented exactly as an integer
48+
* anyway; callers needing exact 64-bit integers should pass a `bigint`).
49+
*/
50+
function inferNumberType(value: number): DBSQLParameterType {
51+
if (!Number.isInteger(value)) {
52+
return DBSQLParameterType.DOUBLE;
53+
}
54+
if (value >= INT32_MIN && value <= INT32_MAX) {
55+
return DBSQLParameterType.INTEGER;
56+
}
57+
if (Number.isSafeInteger(value)) {
58+
return DBSQLParameterType.BIGINT;
59+
}
60+
return DBSQLParameterType.DOUBLE;
61+
}
62+
3263
interface DBSQLParameterOptions {
3364
type?: DBSQLParameterType;
3465
value: DBSQLParameterValue;
@@ -78,7 +109,7 @@ export class DBSQLParameter {
78109
if (typeof this.value === 'number') {
79110
return new TSparkParameter({
80111
name,
81-
type: wireType ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE),
112+
type: wireType ?? inferNumberType(this.value),
82113
value: new TSparkParameterValue({
83114
stringValue: Number(this.value).toString(),
84115
}),
@@ -96,11 +127,17 @@ export class DBSQLParameter {
96127
}
97128

98129
if (this.value instanceof Date) {
130+
// A `Date` bound as `DATE` must project a calendar date (`yyyy-mm-dd`),
131+
// not a full ISO-8601 timestamp: the SEA wire rejects
132+
// `2024-03-14T00:00:00.000Z` as a DATE literal ("trailing input"), and
133+
// Thrift accepts the date-only form just as well. Without an explicit
134+
// DATE type the value still binds as a TIMESTAMP from the full ISO string.
135+
const isDateType = wireType === DBSQLParameterType.DATE;
99136
return new TSparkParameter({
100137
name,
101138
type: wireType ?? DBSQLParameterType.TIMESTAMP,
102139
value: new TSparkParameterValue({
103-
stringValue: this.value.toISOString(),
140+
stringValue: isDateType ? this.value.toISOString().slice(0, 10) : this.value.toISOString(),
104141
}),
105142
});
106143
}

lib/contracts/IClientContext.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ export interface ClientConfig {
2626
useLZ4Compression: boolean;
2727
enableMetricViewMetadata?: boolean;
2828

29+
// When true, DECIMAL values are returned as exact strings and 64-bit
30+
// integers as JS `bigint`, instead of being coerced to a lossy `number`.
31+
// Off by default to preserve the long-standing representation on both the
32+
// Thrift and SEA backends. See `ConnectionOptions.preserveBigNumericPrecision`.
33+
preserveBigNumericPrecision?: boolean;
34+
2935
// Telemetry configuration
3036
telemetryEnabled?: boolean;
3137
telemetryBatchSize?: number;

lib/contracts/IDBSQLClient.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ export type ConnectionOptions = {
5555
proxy?: ProxyOptions;
5656
enableMetricViewMetadata?: boolean;
5757

58+
/**
59+
* Preserve full numeric precision in results. When `true`, DECIMAL columns
60+
* are returned as exact strings and 64-bit integers (BIGINT) as JS `bigint`,
61+
* instead of the default lossy coercion to a JS `number` (which silently
62+
* rounds DECIMALs and integers beyond 2^53). Applies to both the Thrift and
63+
* SEA backends. Defaults to `false` to preserve the existing representation.
64+
*/
65+
preserveBigNumericPrecision?: boolean;
66+
5867
/**
5968
* Extra HTTP headers attached to driver-owned out-of-band requests
6069
* (telemetry POSTs and feature-flag GETs). Not applied to the primary

lib/contracts/InternalConnectionOptions.ts

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,49 @@ export interface InternalConnectionOptions {
2929
/**
3030
* SEA-only: verify the server's TLS certificate. Secure-by-default — omit
3131
* to keep full chain + hostname verification; set `false` only to opt into
32-
* the insecure accept-anything mode.
32+
* the insecure accept-anything mode. This is the master verify toggle:
33+
* `false` also subsumes the hostname check (see
34+
* `checkServerCertificateHostname`). Mirrors the Python connector's
35+
* `_tls_no_verify` (inverted).
3336
* @internal SEA path only.
3437
*/
3538
checkServerCertificate?: boolean;
3639

40+
/**
41+
* SEA-only: verify that the server certificate matches the host
42+
* (hostname-vs-SNI check), independently of full chain validation. Omit
43+
* to keep the secure default (on); set `false` to skip only the hostname
44+
* check while still validating the chain — e.g. connecting via an IP
45+
* literal or a host the cert wasn't issued for. No-op when
46+
* `checkServerCertificate` is `false` (that disables everything). Mirrors
47+
* the Python connector's `_tls_verify_hostname`.
48+
* @internal SEA path only.
49+
*/
50+
checkServerCertificateHostname?: boolean;
51+
3752
/**
3853
* SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the
3954
* trust store on top of the system roots — for TLS-inspecting proxies or
4055
* on-prem internal CAs. Honoured regardless of `checkServerCertificate`.
4156
* @internal SEA path only.
4257
*/
4358
customCaCert?: Buffer | string;
59+
60+
/**
61+
* SEA-only: PEM-encoded client certificate (string or `Buffer`) for
62+
* mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a
63+
* leaf cert optionally followed by its intermediate chain is accepted.
64+
* Mirrors the Python connector's `_tls_client_cert_file`.
65+
* @internal SEA path only.
66+
*/
67+
clientCertPem?: Buffer | string;
68+
69+
/**
70+
* SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS
71+
* client certificate. Must be supplied together with `clientCertPem`.
72+
* For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the
73+
* Python connector's `_tls_client_cert_key_file`.
74+
* @internal SEA path only.
75+
*/
76+
clientKeyPem?: Buffer | string;
4477
}

lib/contracts/OperationStatus.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,33 @@ export interface OperationStatus {
5353
* to `WaitUntilReadyOptions.callback` for the consumer to interpret.
5454
*/
5555
progressUpdateResponse?: unknown;
56+
57+
/**
58+
* Number of rows modified by a DML statement (UPDATE / INSERT / DELETE /
59+
* MERGE). `undefined`/`null` for SELECT and on backends/warehouses that do
60+
* not surface the counter. Mirrors Thrift's
61+
* `TGetOperationStatusResp.numModifiedRows`.
62+
*/
63+
numModifiedRows?: number | null;
64+
65+
/**
66+
* Server-supplied user-facing message, when the backend exposes one. Mirrors
67+
* Thrift's `TGetOperationStatusResp.displayMessage`. May contain SQL
68+
* fragments or parameter values — treat as potentially sensitive.
69+
*/
70+
displayMessage?: string | null;
71+
72+
/**
73+
* Server-supplied diagnostic detail (multi-line operator / stack context),
74+
* when available. Mirrors Thrift's `TGetOperationStatusResp.diagnosticInfo`.
75+
* For support surfaces, not user-facing.
76+
*/
77+
diagnosticInfo?: string | null;
78+
79+
/**
80+
* Server-supplied JSON blob with extended error details, when available.
81+
* Mirrors Thrift's `TGetOperationStatusResp.errorDetailsJson`. Pass-through
82+
* string — callers parse with `JSON.parse` if they need structured access.
83+
*/
84+
errorDetailsJson?: string | null;
5685
}

lib/result/ArrowResultConverter.ts

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import {
1515
RecordBatchReader,
1616
util as arrowUtils,
1717
} from 'apache-arrow';
18-
import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types';
18+
import { TTableSchema, TColumnDesc, TTypeId } from '../../thrift/TCLIService_types';
1919
import IClientContext from '../contracts/IClientContext';
2020
import HiveDriverError from '../errors/HiveDriverError';
2121
import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider';
@@ -169,13 +169,41 @@ function formatDayTimeFromTotal(totalNanos: bigint): string {
169169
return `${sign}${days.toString()} ${pad2(hours)}:${pad2(minutes)}:${pad2(seconds)}${fraction}`;
170170
}
171171

172+
/**
173+
* Render an Arrow `Decimal` value — supplied as its unscaled integer (from
174+
* `bigNumToBigInt`) plus the column `scale` — as an exact decimal string,
175+
* e.g. unscaled `1234567890` / scale `5` → `"12345.67890"`. Used by the
176+
* precision-preserving path so high-precision DECIMALs survive the round-trip
177+
* instead of being flattened to an IEEE-754 double.
178+
*/
179+
export function bigNumDecimalToString(unscaled: bigint, scale: number): string {
180+
if (scale <= 0) {
181+
return unscaled.toString();
182+
}
183+
const negative = unscaled < ZERO_BIGINT;
184+
// `padStart(scale + 1)` guarantees at least one digit before the point
185+
// (e.g. unscaled `5` / scale `2` → `"005"` → `"0.05"`).
186+
const digits = (negative ? -unscaled : unscaled).toString().padStart(scale + 1, '0');
187+
const cut = digits.length - scale;
188+
return `${negative ? '-' : ''}${digits.slice(0, cut)}.${digits.slice(cut)}`;
189+
}
190+
172191
export default class ArrowResultConverter implements IResultsProvider<Array<any>> {
173192
private readonly context: IClientContext;
174193

175194
private readonly source: IResultsProvider<ArrowBatch>;
176195

177196
private readonly schema: Array<TColumnDesc>;
178197

198+
// When true, DECIMAL and 64-bit integer values keep full precision —
199+
// DECIMAL as an exact string and BIGINT as a JS `bigint` — instead of being
200+
// coerced to a lossy `number`. Enabled by the SEA backend, which always
201+
// receives native Arrow `Decimal128` / `Int64` from the kernel and has no
202+
// server-side "send as string" escape hatch (the Thrift backend gets the
203+
// string form via `useArrowNativeTypes=false`). Off by default so the Thrift
204+
// path keeps its long-standing `number` representation unchanged.
205+
private readonly preserveBigNumericPrecision: boolean;
206+
179207
private recordBatchReader?: IterableIterator<RecordBatch<TypeMap>>;
180208

181209
// Remaining rows in current Arrow batch (not the record batch!)
@@ -193,10 +221,16 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
193221
// operation backend and the SEA backend's neutral `ResultMetadata` —
194222
// which both carry `schema?: TTableSchema` — can construct the converter
195223
// without an adapter at the call site.
196-
constructor(context: IClientContext, source: IResultsProvider<ArrowBatch>, { schema }: { schema?: TTableSchema }) {
224+
constructor(
225+
context: IClientContext,
226+
source: IResultsProvider<ArrowBatch>,
227+
{ schema }: { schema?: TTableSchema },
228+
{ preserveBigNumericPrecision = false }: { preserveBigNumericPrecision?: boolean } = {},
229+
) {
197230
this.context = context;
198231
this.source = source;
199232
this.schema = getSchemaColumns(schema);
233+
this.preserveBigNumericPrecision = preserveBigNumericPrecision;
200234
}
201235

202236
public async hasMore() {
@@ -374,6 +408,11 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
374408
if (value instanceof Object && value[isArrowBigNumSymbol]) {
375409
const result = bigNumToBigInt(value);
376410
if (DataType.isDecimal(valueType)) {
411+
// Preserve full precision as an exact string when requested (SEA);
412+
// otherwise keep the historical lossy `number` form.
413+
if (this.preserveBigNumericPrecision) {
414+
return bigNumDecimalToString(result, valueType.scale);
415+
}
377416
return Number(result) / 10 ** valueType.scale;
378417
}
379418
// A rewritten Duration Int64 surfaces as a raw `bigint`, not a BigNum
@@ -397,6 +436,12 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
397436
if (durationUnit) {
398437
return formatDurationToIntervalDayTime(value, durationUnit);
399438
}
439+
// Keep the exact `bigint` when precision must be preserved (SEA); the
440+
// default path narrows to `number` for backward compatibility (the
441+
// Thrift backend has always returned BIGINT as a JS `number`).
442+
if (this.preserveBigNumericPrecision) {
443+
return value;
444+
}
400445
return Number(value);
401446
}
402447

@@ -411,7 +456,23 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
411456
const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry;
412457
const field = column.columnName;
413458
const value = record[field];
414-
result[field] = value === null ? null : convertThriftValue(typeDescriptor, value);
459+
if (value === null) {
460+
result[field] = null;
461+
return;
462+
}
463+
// When preserving precision, DECIMAL and BIGINT values were already
464+
// produced in their exact form by `convertArrowTypes` (string / bigint).
465+
// `convertThriftValue` would narrow both back to a lossy `number`
466+
// (DECIMAL_TYPE → `Number(value)`, BIGINT_TYPE → `convertBigInt`), so
467+
// pass them through untouched on this path.
468+
if (
469+
this.preserveBigNumericPrecision &&
470+
(typeDescriptor?.type === TTypeId.DECIMAL_TYPE || typeDescriptor?.type === TTypeId.BIGINT_TYPE)
471+
) {
472+
result[field] = value;
473+
return;
474+
}
475+
result[field] = convertThriftValue(typeDescriptor, value);
415476
});
416477

417478
return result;

0 commit comments

Comments
 (0)