Skip to content

Commit 821c2f6

Browse files
committed
fix(sea): address PR review comments + fix failing unit tests
Failing unit test (TS2737): ArrowResultConverter.test.ts used BigInt literals (`123n`), which don't compile at the tsconfig ES2018 target the unit-test job typechecks against. Replaced with `BigInt('123')` calls (the BigInt function is available at ES2018; only the literal syntax is gated). Review comments: - C1 (rich-status propagation untested): the kernel-statement fakes returned null for all four rich-status accessors, so the propagation through `op.status()` was never exercised with a real value. Parameterized the fakes and added a sync-DML test asserting numModifiedRows / displayMessage / diagnosticInfo / errorDetailsJson all surface, plus an all-null SELECT case. (Verified live: the SEA REST server delivers DML counts as a `num_affected_rows` result column rather than on the status envelope, so the kernel accessor returns null against current SEA warehouses by design; Thrift on the same warehouse surfaces Int64(4) on status. The wiring is correct and will surface values when a server populates the status field.) - C2 (synthesizeThriftStatus rich fields untested): added tests for the Int64 re-boxing of numModifiedRows (including 0 vs absent), the null -> undefined mapping, and the string passthrough fields. - C3 (header injection): customHeaders values/names were forwarded verbatim; CR/LF/NUL enable HTTP header injection and the kernel only rejects them at connect time with an opaque error. Added early validation in buildSeaHttpOptions throwing a clear HiveDriverError that names the offending header (validated before the reserved-name drop). Verified live against pecotesting. - C4 (dead conjunct): simplified `if (this.asyncStatement && !this.cancellableExecution)` to `if (this.asyncStatement)` — the constructor already guarantees the handle kinds are mutually exclusive. - C5 (redundant FFI reads): memoized readRichStatusFields so a re-status() of a terminal operation reuses the read instead of re-hitting the four kernel accessors. Covered by the C1 read-count assertion. Co-authored-by: Isaac
1 parent f258df4 commit 821c2f6

6 files changed

Lines changed: 214 additions & 18 deletions

File tree

lib/sea/SeaAuth.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,12 +359,35 @@ export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions {
359359
*/
360360
const KERNEL_MANAGED_HEADERS = new Set(['authorization', 'x-databricks-org-id']);
361361

362+
// CR / LF / NUL in a header name or value enable request-splitting / header
363+
// injection. The kernel's HTTP client (reqwest) does reject these, but only at
364+
// connect time and with an opaque "Failed to construct HTTP client:
365+
// InvalidArgument: failed to parse header value" error that names neither the
366+
// offending header nor the cause. Reject them here, before the FFI hop, with a
367+
// clear error so a caller gets actionable signal at the point they set the
368+
// header (verified against pecotesting: the kernel otherwise surfaces the
369+
// opaque construction error).
370+
const FORBIDDEN_HEADER_CHARS = /[\r\n\0]/;
371+
372+
function validateHeaderToken(kind: 'name' | 'value', headerName: string, token: string): void {
373+
if (FORBIDDEN_HEADER_CHARS.test(token)) {
374+
throw new HiveDriverError(
375+
`SEA backend: customHeaders ${kind} for \`${headerName}\` contains a forbidden control character ` +
376+
`(CR, LF, or NUL). Such characters enable HTTP header injection and are rejected.`,
377+
);
378+
}
379+
}
380+
362381
export function buildSeaHttpOptions(options: ConnectionOptions): SeaHttpOptions {
363382
const { customHeaders, userAgentEntry } = options;
364383

365384
const headers: Array<{ name: string; value: string }> = [];
366385
if (customHeaders) {
367386
for (const [name, value] of Object.entries(customHeaders)) {
387+
// Reject CR/LF/NUL in either the name or the value before forwarding —
388+
// a clear, early error instead of the kernel's opaque connect-time throw.
389+
validateHeaderToken('name', name, name);
390+
validateHeaderToken('value', name, value);
368391
// Drop kernel-managed reserved names before the FFI hop — same
369392
// double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`.
370393
if (KERNEL_MANAGED_HEADERS.has(name.toLowerCase())) {

lib/sea/SeaOperationBackend.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,13 @@ export default class SeaOperationBackend implements IOperationBackend {
192192
// sync-execute path once `cancellableExecution.result()` settles.
193193
private blockingStatement?: SeaOperationStatement;
194194

195+
// Memoized rich-status read. `readRichStatusFields()` is only ever invoked
196+
// once the operation is terminal (the Succeeded branches of `status()` and
197+
// the `seaFinished` progress callback), and the kernel's terminal response is
198+
// immutable, so the FFI accessors are read exactly once and the result
199+
// reused — re-`status()`-ing a completed operation is then free.
200+
private richStatusFieldsPromise?: Promise<SeaRichStatusFields>;
201+
195202
// The cancel/close surface — whichever handle backs this operation. Both
196203
// `AsyncStatement` and `Statement` expose `cancel()` / `close()`; the
197204
// sync-execute path uses a composite that routes `cancel()` to the
@@ -474,7 +481,14 @@ export default class SeaOperationBackend implements IOperationBackend {
474481
* status-field read must not turn a successful operation's status query into
475482
* a throw. The fields are best-effort metadata, not the operation outcome.
476483
*/
477-
private async readRichStatusFields(): Promise<SeaRichStatusFields> {
484+
private readRichStatusFields(): Promise<SeaRichStatusFields> {
485+
if (!this.richStatusFieldsPromise) {
486+
this.richStatusFieldsPromise = this.computeRichStatusFields();
487+
}
488+
return this.richStatusFieldsPromise;
489+
}
490+
491+
private async computeRichStatusFields(): Promise<SeaRichStatusFields> {
478492
const empty: SeaRichStatusFields = {
479493
numModifiedRows: null,
480494
displayMessage: null,
@@ -483,8 +497,10 @@ export default class SeaOperationBackend implements IOperationBackend {
483497
};
484498

485499
// The async path never produces a terminal sync `Statement`, so there is
486-
// nothing to read these off of.
487-
if (this.asyncStatement && !this.cancellableExecution) {
500+
// nothing to read these off of. (The constructor guarantees exactly one of
501+
// `asyncStatement` / `statement` / `cancellableExecution`, so `asyncStatement`
502+
// being set already implies `cancellableExecution` is undefined.)
503+
if (this.asyncStatement) {
488504
return empty;
489505
}
490506

tests/unit/result/ArrowResultConverter.test.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ describe('ArrowResultConverter', () => {
215215
it('preserves BIGINT precision as bigint when preserveBigNumericPrecision is set', async () => {
216216
// 9007199254740993 = Number.MAX_SAFE_INTEGER + 2 — not exactly
217217
// representable as a JS number.
218-
const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) });
218+
const table = tableFromArrays({ big_value: BigInt64Array.from([BigInt('9007199254740993'), BigInt('5')]) });
219219
const rowSetProvider = new ResultsProviderStub(
220220
[{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }],
221221
emptyItem,
@@ -227,13 +227,13 @@ describe('ArrowResultConverter', () => {
227227
{ preserveBigNumericPrecision: true },
228228
);
229229
expect(await result.fetchNext({ limit: 10000 })).to.deep.equal([
230-
{ big_value: 9007199254740993n },
231-
{ big_value: 5n },
230+
{ big_value: BigInt('9007199254740993') },
231+
{ big_value: BigInt('5') },
232232
]);
233233
});
234234

235235
it('narrows BIGINT to a (lossy) number by default — preserves the Thrift contract', async () => {
236-
const table = tableFromArrays({ big_value: BigInt64Array.from([9007199254740993n, 5n]) });
236+
const table = tableFromArrays({ big_value: BigInt64Array.from([BigInt('9007199254740993'), BigInt('5')]) });
237237
const rowSetProvider = new ResultsProviderStub(
238238
[{ batches: [createSampleArrowBatch(table.batches[0])], rowCount: 2 }],
239239
emptyItem,
@@ -246,10 +246,10 @@ describe('ArrowResultConverter', () => {
246246
});
247247

248248
it('formats unscaled decimals to exact strings (bigNumDecimalToString)', () => {
249-
expect(bigNumDecimalToString(1234567890n, 5)).to.equal('12345.67890'); // trailing zero kept
250-
expect(bigNumDecimalToString(-1234567890123456789n, 4)).to.equal('-123456789012345.6789');
251-
expect(bigNumDecimalToString(5n, 2)).to.equal('0.05'); // leading zero synthesized
252-
expect(bigNumDecimalToString(-5n, 2)).to.equal('-0.05');
253-
expect(bigNumDecimalToString(12345n, 0)).to.equal('12345'); // scale 0 → integer string
249+
expect(bigNumDecimalToString(BigInt('1234567890'), 5)).to.equal('12345.67890'); // trailing zero kept
250+
expect(bigNumDecimalToString(BigInt('-1234567890123456789'), 4)).to.equal('-123456789012345.6789');
251+
expect(bigNumDecimalToString(BigInt('5'), 2)).to.equal('0.05'); // leading zero synthesized
252+
expect(bigNumDecimalToString(BigInt('-5'), 2)).to.equal('-0.05');
253+
expect(bigNumDecimalToString(BigInt('12345'), 0)).to.equal('12345'); // scale 0 → integer string
254254
});
255255
});

tests/unit/sea/connectionOptions.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,39 @@ describe('SeaAuth HTTP options (buildSeaHttpOptions)', () => {
273273
expect(native.customHeaders?.find((h) => h.name === 'X-Trace')?.value).to.equal('abc');
274274
expect(native.customHeaders?.find((h) => h.name === 'User-Agent')?.value).to.contain('MyApp/2.0');
275275
});
276+
277+
describe('rejects header-injection control characters (CR / LF / NUL)', () => {
278+
// The kernel HTTP client does reject these, but only at connect time with an
279+
// opaque "Failed to construct HTTP client: InvalidArgument" error (verified
280+
// against pecotesting). We reject earlier, naming the offending header.
281+
const injections: Array<[string, Record<string, string>]> = [
282+
['CRLF in value', { 'X-Evil': 'ok\r\nInjected-Header: pwned' }],
283+
['bare LF in value', { 'X-Evil': 'a\nb' }],
284+
['bare CR in value', { 'X-Evil': 'a\rb' }],
285+
['NUL in value', { 'X-Evil': 'a\0b' }],
286+
['CRLF in name', { 'X-Ev\r\nil': 'v' }],
287+
['NUL in name', { 'X-Ev\0il': 'v' }],
288+
];
289+
for (const [label, customHeaders] of injections) {
290+
it(`throws HiveDriverError on ${label}`, () => {
291+
expect(() => buildSeaHttpOptions(opts({ customHeaders }))).to.throw(HiveDriverError, /forbidden control character/);
292+
});
293+
}
294+
295+
it('does not throw on a valid header containing spaces, tabs, and punctuation', () => {
296+
expect(() =>
297+
buildSeaHttpOptions(opts({ customHeaders: { 'X-Ok': 'Bearer abc.def-123; q=0.9\tfoo' } })),
298+
).to.not.throw();
299+
});
300+
301+
it('validates a reserved header before dropping it (injection via Authorization is still rejected)', () => {
302+
// Reserved-name drop must not let a CR/LF-laced reserved header slip past
303+
// validation — validate first, then drop.
304+
expect(() =>
305+
buildSeaHttpOptions(opts({ customHeaders: { Authorization: 'Bearer x\r\nInjected: 1' } })),
306+
).to.throw(HiveDriverError, /forbidden control character/);
307+
});
308+
});
276309
});
277310

278311
describe('SeaAuth retry options — buildSeaRetryOptions', () => {

tests/unit/sea/execution.test.ts

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,50 @@ class FakeNativeStatement implements SeaStatement {
5959
this.closed = true;
6060
}
6161

62-
// Status accessors added by the kernel's status-fields surface.
62+
// Status accessors added by the kernel's status-fields surface. The values
63+
// are configurable so a test can assert non-null rich-status (e.g. a DML
64+
// `numModifiedRows`) propagates through `op.status()`; they default to all-
65+
// null, matching a SELECT / metadata statement that carries none.
66+
public rich: SeaRichStatusValues = {
67+
numModifiedRows: null,
68+
displayMessage: null,
69+
diagnosticInfo: null,
70+
errorDetailsJson: null,
71+
};
72+
73+
// Counts every rich-field accessor call so a test can assert the backend
74+
// memoizes the read on a terminal statement (re-`status()` must not re-hit
75+
// the FFI accessors).
76+
public richReads = 0;
77+
6378
public async numModifiedRows(): Promise<number | null> {
64-
return null;
79+
this.richReads += 1;
80+
return this.rich.numModifiedRows;
6581
}
6682

6783
public async displayMessage(): Promise<string | null> {
68-
return null;
84+
this.richReads += 1;
85+
return this.rich.displayMessage;
6986
}
7087

7188
public async diagnosticInfo(): Promise<string | null> {
72-
return null;
89+
this.richReads += 1;
90+
return this.rich.diagnosticInfo;
7391
}
7492

7593
public async errorDetailsJson(): Promise<string | null> {
76-
return null;
94+
this.richReads += 1;
95+
return this.rich.errorDetailsJson;
7796
}
7897
}
7998

99+
interface SeaRichStatusValues {
100+
numModifiedRows: number | null;
101+
displayMessage: string | null;
102+
diagnosticInfo: string | null;
103+
errorDetailsJson: string | null;
104+
}
105+
80106
/**
81107
* Fake `AsyncStatement` (the `submitStatement` return). `status()` reports a
82108
* configurable state (default Succeeded); `awaitResult()` yields a fetch handle
@@ -223,6 +249,10 @@ class FakeNativeConnection implements SeaConnection {
223249
return this.statementToReturn;
224250
}
225251

252+
// Rich status the next sync-execute's terminal Statement should report
253+
// (e.g. a DML `numModifiedRows`). Defaults to all-null (a SELECT).
254+
public richStatus?: SeaRichStatusValues;
255+
226256
// Sync (`runAsync: false`, the DEFAULT) query path: records sql + options and
227257
// returns a pending CancellableExecution whose result() drives the execute.
228258
public async executeStatementCancellable(sql: string, options?: unknown): Promise<any> {
@@ -231,7 +261,11 @@ class FakeNativeConnection implements SeaConnection {
231261
}
232262
this.lastSql = sql;
233263
this.lastOptions = options;
234-
this.lastCancellableExecution = new FakeCancellableExecution();
264+
const resultHandle = new FakeNativeStatement();
265+
if (this.richStatus) {
266+
resultHandle.rich = this.richStatus;
267+
}
268+
this.lastCancellableExecution = new FakeCancellableExecution(resultHandle);
235269
return this.lastCancellableExecution;
236270
}
237271

@@ -1148,4 +1182,45 @@ describe('SeaOperationBackend — sync (executeStatementCancellable) path', () =
11481182
expect(status.isSuccess).to.equal(true);
11491183
expect(exec.resultHandle.closed).to.equal(false);
11501184
});
1185+
1186+
it('surfaces the kernel rich-status fields (numModifiedRows etc.) through op.status() and memoizes the read', async () => {
1187+
// A DML statement's terminal kernel `Statement` carries numModifiedRows /
1188+
// displayMessage / diagnosticInfo / errorDetailsJson. Drive the sync execute
1189+
// to terminal and assert each non-null field propagates through op.status()
1190+
// (previously the fakes returned all-null, so this propagation was untested).
1191+
const resultHandle = new FakeNativeStatement();
1192+
resultHandle.rich = {
1193+
numModifiedRows: 42,
1194+
displayMessage: 'INSERT 0 42',
1195+
diagnosticInfo: 'stage 1/1 finished',
1196+
errorDetailsJson: '{"detail":"none"}',
1197+
};
1198+
const exec = new FakeCancellableExecution(resultHandle);
1199+
const op = makeSyncOp(exec);
1200+
await op.waitUntilReady();
1201+
1202+
const status = await op.status(false);
1203+
expect(status.state).to.equal(OperationState.Succeeded);
1204+
expect(status.numModifiedRows).to.equal(42);
1205+
expect(status.displayMessage).to.equal('INSERT 0 42');
1206+
expect(status.diagnosticInfo).to.equal('stage 1/1 finished');
1207+
expect(status.errorDetailsJson).to.equal('{"detail":"none"}');
1208+
1209+
// C5: re-status()-ing a completed op reuses the memoized read — the four FFI
1210+
// accessors fire exactly once across both status() calls (4 reads, not 8).
1211+
await op.status(false);
1212+
expect(resultHandle.richReads).to.equal(4);
1213+
});
1214+
1215+
it('reports all-null rich-status for a SELECT (no rows modified) — the default', async () => {
1216+
// A read-only statement carries no numModifiedRows; the backend surfaces
1217+
// null rather than fabricating a value.
1218+
const op = makeSyncOp(new FakeCancellableExecution());
1219+
await op.waitUntilReady();
1220+
const status = await op.status(false);
1221+
expect(status.numModifiedRows).to.equal(null);
1222+
expect(status.displayMessage).to.equal(null);
1223+
expect(status.diagnosticInfo).to.equal(null);
1224+
expect(status.errorDetailsJson).to.equal(null);
1225+
});
11511226
});

tests/unit/thrift-backend/wireSynthesis.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { expect } from 'chai';
2+
import Int64 from 'node-int64';
23
import { TOperationState, TSparkRowSetType, TStatusCode } from '../../../thrift/TCLIService_types';
34
import { OperationState, OperationStatus } from '../../../lib/contracts/OperationStatus';
45
import { ResultFormat, ResultMetadata } from '../../../lib/contracts/ResultMetadata';
@@ -68,6 +69,54 @@ describe('wireSynthesis', () => {
6869
expect(resp.errorMessage).to.equal('should-not-elevate-to-error-but-still-passed-through');
6970
expect(resp.sqlState).to.equal('01000');
7071
});
72+
73+
describe('rich status fields (numModifiedRows / displayMessage / diagnosticInfo / errorDetailsJson)', () => {
74+
it('re-boxes numModifiedRows as a Thrift Int64 (matching the Thrift deserializer wire shape)', () => {
75+
const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: 5 });
76+
expect(resp.numModifiedRows, 'should be a node-int64 Int64').to.be.instanceOf(Int64);
77+
expect((resp.numModifiedRows as Int64).toNumber()).to.equal(5);
78+
});
79+
80+
it('re-boxes numModifiedRows: 0 as Int64(0) — a real zero-row DML result, not "absent"', () => {
81+
const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: 0 });
82+
expect(resp.numModifiedRows, '0 is a value, not a missing field').to.be.instanceOf(Int64);
83+
expect((resp.numModifiedRows as Int64).toNumber()).to.equal(0);
84+
});
85+
86+
it('maps a null numModifiedRows (server did not supply) to undefined, matching the Thrift path', () => {
87+
const resp = synthesizeThriftStatus({ ...baseStatus, numModifiedRows: null });
88+
expect(resp.numModifiedRows).to.equal(undefined);
89+
});
90+
91+
it('maps an absent numModifiedRows to undefined', () => {
92+
const resp = synthesizeThriftStatus({ ...baseStatus });
93+
expect(resp.numModifiedRows).to.equal(undefined);
94+
});
95+
96+
it('passes displayMessage / diagnosticInfo / errorDetailsJson through as strings', () => {
97+
const resp = synthesizeThriftStatus({
98+
...baseStatus,
99+
displayMessage: 'INSERT 0 5',
100+
diagnosticInfo: 'stage 1/1 finished',
101+
errorDetailsJson: '{"detail":"none"}',
102+
});
103+
expect(resp.displayMessage).to.equal('INSERT 0 5');
104+
expect(resp.diagnosticInfo).to.equal('stage 1/1 finished');
105+
expect(resp.errorDetailsJson).to.equal('{"detail":"none"}');
106+
});
107+
108+
it('maps null string fields to undefined (absent, not the literal null)', () => {
109+
const resp = synthesizeThriftStatus({
110+
...baseStatus,
111+
displayMessage: null,
112+
diagnosticInfo: null,
113+
errorDetailsJson: null,
114+
});
115+
expect(resp.displayMessage).to.equal(undefined);
116+
expect(resp.diagnosticInfo).to.equal(undefined);
117+
expect(resp.errorDetailsJson).to.equal(undefined);
118+
});
119+
});
71120
});
72121

73122
describe('synthesizeThriftResultSetMetadata', () => {

0 commit comments

Comments
 (0)