Skip to content

Commit 1f33e94

Browse files
committed
feat(sea): Thrift-parity for intervals, getInfo, and SQL-error class
Three driver-side parity fixes validated via the node comparator (thrift-vs-SEA on a live warehouse); params/queryTimeout are intentionally out of scope (positional params land via the kernel TypedValue codec). - intervals: default `intervalsAsString: true` on the SEA connection (SeaAuth) so INTERVAL columns surface as strings like the Thrift driver, and map a physical-Utf8 column carrying INTERVAL type-name metadata to STRING in SeaArrowIpc (the kernel keeps the INTERVAL metadata even when rendering as string). Result: interval columns match Thrift in both type code (7) and value. Requires the kernel napi `intervalsAsString` knob. - getInfo: synthesize the three TGetInfoTypes the Thrift server answers — CLI_SERVER_NAME / CLI_DBMS_NAME = "Spark SQL", CLI_DBMS_VER = "3.1.1" — client-side (JDBC DatabaseMetaData style; SEA/kernel has no getInfo RPC), and reject the rest like the server does. New SeaServerInfo.ts. - error class: map kernel `SqlError` (server execution failures — PERMISSION_DENIED, SCHEMA_ALREADY_EXISTS, bad SQL) to OperationStateError(ERROR) instead of the base HiveDriverError, matching the Thrift backend's operation-status error class. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent f8a86d5 commit 1f33e94

12 files changed

Lines changed: 250 additions & 4 deletions

lib/sea/SeaArrowIpc.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,19 @@ export function patchIpcBytes(ipcBytes: Buffer): Buffer {
112112
function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
113113
const typeName = field.metadata.get(DATABRICKS_TYPE_NAME)?.toUpperCase();
114114

115+
// `intervals_as_string` (set by the SEA backend for Thrift parity)
116+
// renders INTERVAL columns as physical Arrow `Utf8` while the kernel
117+
// keeps the `INTERVAL …` type_name metadata. The Thrift driver reports
118+
// such string-rendered intervals as STRING (type 7), so honour the
119+
// physical type here rather than the semantic metadata — otherwise the
120+
// SEA path would report INTERVAL (20/21) and diverge from Thrift on a
121+
// column whose values are already identical strings. Native interval
122+
// encodings (the kernel default) are Duration / MonthInterval, never
123+
// Utf8, so this guard is inert unless `intervals_as_string` is on.
124+
if (typeName?.startsWith('INTERVAL') && DataType.isUtf8(field.type)) {
125+
return TTypeId.STRING_TYPE;
126+
}
127+
115128
switch (typeName) {
116129
case 'BOOLEAN':
117130
return TTypeId.BOOLEAN_TYPE;

lib/sea/SeaAuth.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,24 @@ export interface SeaSessionDefaults {
6666
catalog?: string;
6767
schema?: string;
6868
sessionConf?: Record<string, string>;
69+
/**
70+
* Render `INTERVAL` / `DURATION` result columns as strings
71+
* (kernel `ResultConfig.intervals_as_string`). The kernel default is
72+
* native Arrow `month_interval` / `duration[us]`, but the NodeJS
73+
* Thrift driver surfaces intervals as strings — so the SEA path sets
74+
* this `true` so its result shape is a byte-compatible drop-in for the
75+
* Thrift backend. Omitting it falls back to the kernel's native types.
76+
*/
77+
intervalsAsString?: boolean;
78+
/**
79+
* Render complex (`ARRAY` / `MAP` / `STRUCT` / `VARIANT`) result
80+
* columns as JSON strings (kernel `ResultConfig.complex_types_as_json`).
81+
* Left unset on the SEA path: native Arrow nested types already decode
82+
* identically to the Thrift backend through the shared Arrow converter,
83+
* so forcing JSON here would *introduce* a divergence rather than
84+
* remove one.
85+
*/
86+
complexTypesAsJson?: boolean;
6987
}
7088

7189
export type SeaNativeConnectionOptions = SeaSessionDefaults &
@@ -161,6 +179,13 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
161179
const base = {
162180
hostName: options.host,
163181
httpPath: prependSlash(options.path),
182+
// Match the NodeJS Thrift driver, which surfaces INTERVAL columns as
183+
// strings. The kernel defaults to native Arrow interval/duration
184+
// types; forcing the string rendering here keeps the SEA path a
185+
// byte-compatible drop-in. Complex types are intentionally left at
186+
// the kernel default (native Arrow) — they already decode identically
187+
// to Thrift via the shared Arrow converter.
188+
intervalsAsString: true,
164189
};
165190

166191
const oauth = options as {

lib/sea/SeaErrorMapping.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,18 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta
147147
error = new ParameterError(message);
148148
break;
149149

150+
case 'SqlError':
151+
// A server-side SQL execution failure (the statement reached an
152+
// ERROR state on the warehouse — bad SQL, PERMISSION_DENIED,
153+
// SCHEMA_ALREADY_EXISTS, …). The Thrift backend surfaces exactly
154+
// this situation as an `OperationStateError(ERROR)` after polling
155+
// the operation status, so we mirror that class here for
156+
// drop-in parity (both extend HiveDriverError, so existing
157+
// `catch (HiveDriverError)` callers are unaffected).
158+
error = new OperationStateError(OperationStateErrorCode.Error);
159+
error.message = message;
160+
break;
161+
150162
// All remaining kernel ErrorCode variants map to the base driver error class.
151163
// M0 intentionally does not introduce new error classes; M1 may add nuance.
152164
case 'NotFound':
@@ -156,7 +168,6 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta
156168
case 'Internal':
157169
case 'InvalidStatementHandle':
158170
case 'NetworkError':
159-
case 'SqlError':
160171
error = new HiveDriverError(message);
161172
break;
162173

lib/sea/SeaServerInfo.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) 2026 Databricks, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import { TGetInfoType, TGetInfoValue } from '../../thrift/TCLIService_types';
16+
17+
/**
18+
* `getInfo` (JDBC `DatabaseMetaData` / ODBC `SQLGetInfo`) is a Thrift-protocol
19+
* concept: the Thrift backend forwards `TGetInfoReq` to the server's `getInfo`
20+
* RPC. The SEA REST protocol and the Rust kernel have **no** equivalent
21+
* endpoint, so — exactly as JDBC does for `DatabaseMetaData` — we synthesize
22+
* the values client-side.
23+
*
24+
* The Databricks Thrift server itself answers only three `TGetInfoType`s and
25+
* rejects every other value; we mirror that surface byte-for-byte so the SEA
26+
* path is a drop-in equivalent:
27+
*
28+
* | TGetInfoType | Thrift server | SEA (here) |
29+
* |---------------------|---------------|-------------------|
30+
* | CLI_SERVER_NAME (13)| "Spark SQL" | "Spark SQL" |
31+
* | CLI_DBMS_NAME (17)| "Spark SQL" | "Spark SQL" |
32+
* | CLI_DBMS_VER (18)| "3.1.1" | "3.1.1" |
33+
* | (any other) | error | undefined → error |
34+
*/
35+
36+
/** Canonical DBMS product name — identical to the Thrift server's value. */
37+
export const SEA_DBMS_NAME = 'Spark SQL';
38+
39+
/** Server-name answer — identical to the Thrift server's value. */
40+
export const SEA_SERVER_NAME = 'Spark SQL';
41+
42+
/**
43+
* DBMS version string. Mirrors the constant the Databricks Thrift server
44+
* reports for `CLI_DBMS_VER` (the HiveServer2-compat Spark SQL version, not
45+
* the DBR release). Kept in lock-step with Thrift for parity; if the server
46+
* ever changes it the comparator's GET_INFO suite flags the drift.
47+
*/
48+
export const SEA_DBMS_VERSION = '3.1.1';
49+
50+
/**
51+
* Synthesize the `TGetInfoValue` for a `getInfo` request on the SEA path.
52+
* Returns `undefined` for any `TGetInfoType` the (Thrift) server does not
53+
* answer — the caller surfaces that as an error, matching Thrift's
54+
* reject-unsupported-info-type behaviour.
55+
*/
56+
export function seaServerInfoValue(infoType: number): TGetInfoValue | undefined {
57+
switch (infoType) {
58+
case TGetInfoType.CLI_SERVER_NAME:
59+
return new TGetInfoValue({ stringValue: SEA_SERVER_NAME });
60+
case TGetInfoType.CLI_DBMS_NAME:
61+
return new TGetInfoValue({ stringValue: SEA_DBMS_NAME });
62+
case TGetInfoType.CLI_DBMS_VER:
63+
return new TGetInfoValue({ stringValue: SEA_DBMS_VERSION });
64+
default:
65+
return undefined;
66+
}
67+
}

lib/sea/SeaSessionBackend.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import { SeaNativeConnection } from './SeaNativeLoader';
3535
import { decodeNapiKernelError } from './SeaErrorMapping';
3636
import SeaOperationBackend from './SeaOperationBackend';
3737
import SeaTableTypeFilter from './SeaTableTypeFilter';
38+
import { seaServerInfoValue } from './SeaServerInfo';
3839

3940
export interface SeaSessionBackendOptions {
4041
/** The opaque napi `Connection` handle returned by `openSession`. */
@@ -90,8 +91,23 @@ export default class SeaSessionBackend implements ISessionBackend {
9091
return this._id;
9192
}
9293

93-
public async getInfo(_infoType: number): Promise<InfoValue> {
94-
throw new HiveDriverError('SeaSessionBackend.getInfo: not implemented yet (deferred to M1)');
94+
public async getInfo(infoType: number): Promise<InfoValue> {
95+
this.failIfClosed();
96+
// `getInfo` (TGetInfoReq) is a Thrift/JDBC concept with no SEA-protocol or
97+
// kernel equivalent, so — like JDBC's DatabaseMetaData — we synthesize the
98+
// values client-side. `seaServerInfoValue` returns matches for the three
99+
// TGetInfoTypes the Thrift server answers (server name, DBMS name, DBMS
100+
// version) and `undefined` for the rest, which we surface as an error to
101+
// mirror the server's reject-unsupported-info-type behaviour.
102+
const value = seaServerInfoValue(infoType);
103+
if (value === undefined) {
104+
throw new HiveDriverError(
105+
`SEA getInfo: TGetInfoType ${infoType} is not supported. The SEA/kernel protocol ` +
106+
'has no getInfo RPC; only CLI_SERVER_NAME, CLI_DBMS_NAME and CLI_DBMS_VER are ' +
107+
'synthesised (matching the Thrift server, which also rejects all other info types).',
108+
);
109+
}
110+
return new InfoValue(value);
95111
}
96112

97113
/**

tests/unit/sea/SeaIntervalParity.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ import {
4040
RecordBatch,
4141
makeData,
4242
Struct,
43+
Utf8,
4344
vectorFromArray,
4445
tableToIPC,
4546
} from 'apache-arrow';
47+
import { arrowSchemaToThriftSchema } from '../../../lib/sea/SeaArrowIpc';
4648

4749
// eslint-disable-next-line import/no-internal-modules
4850
import { Message as FbMessage } from 'apache-arrow/fb/message';
@@ -363,3 +365,29 @@ describe('SeaOperationBackend — INTERVAL parity with thrift', () => {
363365
expect((rows[0] as any).iv).to.equal('1 00:00:00.000000000');
364366
});
365367
});
368+
369+
describe('SeaArrowIpc interval-as-string type mapping (Thrift parity)', () => {
370+
const { TTypeId } = require('../../../thrift/TCLIService_types'); // eslint-disable-line @typescript-eslint/no-var-requires, global-require, import/no-internal-modules
371+
372+
function thriftType(field: Field) {
373+
const cols = arrowSchemaToThriftSchema(new Schema([field])).columns;
374+
return cols[0].typeDesc?.types?.[0]?.primitiveEntry?.type;
375+
}
376+
377+
it('Utf8 + INTERVAL DAY TO SECOND metadata → STRING (matches Thrift, not INTERVAL)', () => {
378+
// intervals_as_string=true renders the column as physical Utf8 while the
379+
// kernel keeps the INTERVAL type_name metadata; we must report STRING.
380+
const f = new Field('dt', new Utf8(), true, new Map([['databricks.type_name', 'INTERVAL DAY TO SECOND']]));
381+
expect(thriftType(f)).to.equal(TTypeId.STRING_TYPE);
382+
});
383+
384+
it('Utf8 + INTERVAL YEAR TO MONTH metadata → STRING', () => {
385+
const f = new Field('ym', new Utf8(), true, new Map([['databricks.type_name', 'INTERVAL YEAR TO MONTH']]));
386+
expect(thriftType(f)).to.equal(TTypeId.STRING_TYPE);
387+
});
388+
389+
it('native Arrow Interval + INTERVAL metadata still maps to INTERVAL_YEAR_MONTH (guard is inert without intervals_as_string)', () => {
390+
const f = new Field('ym', new Interval(IntervalUnit.YEAR_MONTH), true, new Map([['databricks.type_name', 'INTERVAL YEAR TO MONTH']]));
391+
expect(thriftType(f)).to.equal(TTypeId.INTERVAL_YEAR_MONTH_TYPE);
392+
});
393+
});

tests/unit/sea/auth-m2m.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => {
3838
authMode: 'OAuthM2m',
3939
oauthClientId: 'client-uuid',
4040
oauthClientSecret: 'dose-fake-secret',
41+
intervalsAsString: true,
4142
});
4243
});
4344

@@ -182,6 +183,7 @@ describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => {
182183
authMode: 'OAuthM2m',
183184
oauthClientId: 'client-uuid',
184185
oauthClientSecret: 'dose-fake-secret',
186+
intervalsAsString: true,
185187
});
186188

187189
await session.close();

tests/unit/sea/auth-pat.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ describe('SeaAuth — PAT auth options builder', () => {
3333
httpPath: '/sql/1.0/warehouses/abc',
3434
authMode: 'Pat',
3535
token: 'dapi-fake-pat',
36+
intervalsAsString: true,
3637
});
3738
});
3839

tests/unit/sea/auth-u2m.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => {
3535
httpPath: '/sql/1.0/warehouses/abc',
3636
authMode: 'OAuthU2m',
3737
oauthRedirectPort: 8030,
38+
intervalsAsString: true,
3839
});
3940
});
4041

@@ -145,6 +146,7 @@ describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => {
145146
httpPath: '/sql/1.0/warehouses/abc',
146147
authMode: 'OAuthU2m',
147148
oauthRedirectPort: 8030,
149+
intervalsAsString: true,
148150
});
149151

150152
await session.close();

tests/unit/sea/error-mapping.test.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,15 @@ describe('SeaErrorMapping.mapKernelErrorToJsError', () => {
7676
expectedClass: HiveDriverError,
7777
},
7878
{
79+
// Server-side SQL execution failures surface as OperationStateError(ERROR),
80+
// mirroring the Thrift backend's operation-status-poll error path so the
81+
// two drivers throw the same class. (OperationStateError extends
82+
// HiveDriverError, so base-class catchers still match.)
7983
code: 'SqlError',
80-
expectedClass: HiveDriverError,
84+
expectedClass: OperationStateError,
85+
extra: (err) => {
86+
expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Error);
87+
},
8188
},
8289
];
8390

0 commit comments

Comments
 (0)