Skip to content

Commit 54dbcff

Browse files
committed
fix(sea): address #411 review — fail-loud duration unit, interval test gaps
Addresses the P2 review comment on #411 (the interval/duration layer) and cascades the #410 fixes underneath (this branch was rebased onto the updated #410 tip). Validated against a live warehouse (interval-duration + interval-edge e2e) and unit tests. - ArrowResultConverter.toNanoseconds: throw on an unrecognized Arrow duration unit instead of silently defaulting to NANOSECOND. The four units are exactly what SeaArrowIpcDurationFix stamps, so an unknown unit means the two sides drifted — fail loud, matching formatArrowInterval. - SeaIntervalParity tests: add DAY-TIME via Duration(MILLISECOND) and Duration(SECOND) (only MICROSECOND/NANOSECOND were exercised), and a test asserting the converter throws (HiveDriverError) on a native non-YEAR_MONTH Arrow Interval rather than misreading it. - interval-duration-e2e: flip the stale "raw Int64 on this layer" assertion to expect the formatted thrift string "1 02:03:04.000000000". That test was written on #410 (pre-formatter) and explicitly noted "#411 flips this to the formatted string" — now that #411's formatter is wired, the value is the byte-identical thrift DAY-TIME string. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent 33ad9b5 commit 54dbcff

3 files changed

Lines changed: 70 additions & 7 deletions

File tree

lib/result/ArrowResultConverter.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ function formatDurationToIntervalDayTime(value: bigint | number, unit: string):
116116
* MILLISECOND → × 1_000_000
117117
* MICROSECOND → × 1_000
118118
* NANOSECOND → × 1
119+
*
120+
* Throws on any other unit rather than silently treating it as
121+
* NANOSECOND. The four units above are exactly what
122+
* `SeaArrowIpcDurationFix` enumerates when it stamps the
123+
* `databricks.arrow.duration_unit` metadata, so an unrecognized unit
124+
* here means the two sides have drifted — fail loud (matching
125+
* `formatArrowInterval`'s stance) instead of emitting a confidently
126+
* wrong value.
119127
*/
120128
function toNanoseconds(value: bigint, unit: string): bigint {
121129
switch (unit) {
@@ -126,8 +134,12 @@ function toNanoseconds(value: bigint, unit: string): bigint {
126134
case 'MICROSECOND':
127135
return value * NS_PER_MICRO;
128136
case 'NANOSECOND':
129-
default:
130137
return value;
138+
default:
139+
throw new HiveDriverError(
140+
`SEA INTERVAL DAY-TIME: unrecognized Arrow duration unit ${JSON.stringify(unit)}; ` +
141+
`expected one of SECOND / MILLISECOND / MICROSECOND / NANOSECOND`,
142+
);
131143
}
132144
}
133145

tests/e2e/sea/interval-duration-e2e.test.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ describe('SEA INTERVAL DAY-TIME (Arrow Duration rewriter) end-to-end', function
9393
expect(row.dt, 'INTERVAL DAY-TIME value should be present').to.not.equal(undefined);
9494
});
9595

96-
it('surfaces the value as a raw Int64 on this layer (formatter is PR 3/3)', async () => {
96+
it('surfaces the value as the formatted thrift INTERVAL DAY-TIME string (#411 formatter wired)', async () => {
9797
const row = await fetchOneRow(true, secrets as PecoSecrets);
98-
// Documents the M0/2-of-3 contract: the rewriter makes the column
99-
// *decodable* but the duration_unit formatter is not wired here yet, so the
100-
// value is the raw integer microsecond/nanosecond count, not the thrift
101-
// "1 02:03:04.000000000" string. (#411 flips this to the formatted string.)
102-
expect(['number', 'bigint']).to.include(typeof row.dt);
98+
// #411 wires the duration_unit formatter, so the raw Int64 the rewriter
99+
// produces is rendered as the thrift "D HH:mm:ss.fffffffff" string —
100+
// byte-identical to the Thrift path. (On the #410 layer this surfaced as
101+
// the raw integer count.)
102+
expect(typeof row.dt).to.equal('string');
103+
expect(row.dt).to.equal('1 02:03:04.000000000');
103104
});
104105
});

tests/unit/sea/SeaIntervalParity.test.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ import { TimeUnit as FbTimeUnit } from 'apache-arrow/fb/time-unit';
6161

6262
import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend';
6363
import ClientContextStub from '../.stubs/ClientContextStub';
64+
import HiveDriverError from '../../../lib/errors/HiveDriverError';
6465
import { DURATION_UNIT_METADATA_KEY as CONVERTER_DURATION_KEY } from '../../../lib/result/ArrowResultConverter';
6566
import { DURATION_UNIT_METADATA_KEY as REWRITER_DURATION_KEY } from '../../../lib/sea/SeaArrowIpcDurationFix';
6667

@@ -306,6 +307,55 @@ describe('SeaOperationBackend — INTERVAL parity with thrift', () => {
306307
expect((rows[0] as any).iv).to.equal('1 02:03:04.123456789');
307308
});
308309

310+
it('DAY-TIME via Arrow Duration(MILLISECOND) scales correctly', async () => {
311+
// 1 day + 2h + 3min + 4s = 93784 s = 93_784_000 ms.
312+
const millis = BigInt(93_784) * BigInt(1_000);
313+
const ipc = buildDurationIpc('iv', FbTimeUnit.MILLISECOND, [millis], 'INTERVAL');
314+
const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MILLISECOND, 'INTERVAL');
315+
316+
const stub = new StatementStub(schemaIpc, [ipc]);
317+
const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() });
318+
const rows = await backend.fetchChunk({ limit: 100 });
319+
expect(rows).to.have.length(1);
320+
expect((rows[0] as any).iv).to.equal('1 02:03:04.000000000');
321+
});
322+
323+
it('DAY-TIME via Arrow Duration(SECOND) scales correctly', async () => {
324+
// 1 day + 2h + 3min + 4s = 93784 s.
325+
const seconds = BigInt(93_784);
326+
const ipc = buildDurationIpc('iv', FbTimeUnit.SECOND, [seconds], 'INTERVAL');
327+
const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.SECOND, 'INTERVAL');
328+
329+
const stub = new StatementStub(schemaIpc, [ipc]);
330+
const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() });
331+
const rows = await backend.fetchChunk({ limit: 100 });
332+
expect(rows).to.have.length(1);
333+
expect((rows[0] as any).iv).to.equal('1 02:03:04.000000000');
334+
});
335+
336+
it('native non-YEAR_MONTH Arrow Interval is rejected (fail-loud, not misread as [days, ms])', async () => {
337+
// The kernel only emits YEAR_MONTH as a native Arrow Interval; DAY-TIME
338+
// arrives as Duration. A native DAY_TIME Interval reaching the converter
339+
// means the kernel contract drifted — formatArrowInterval must throw
340+
// rather than silently misread the value.
341+
const fields = [withTypeName(new Field('iv', new Interval(IntervalUnit.DAY_TIME), true), 'INTERVAL')];
342+
const schema = new Schema(fields);
343+
const schemaIpc = ipcSchemaOnly(schema);
344+
// One DAY_TIME value: [days=1, milliseconds=1000] packed as Int32Array(2).
345+
const dataIpc = ipcFromColumns(schema, { iv: [Int32Array.from([1, 1000])] });
346+
347+
const stub = new StatementStub(schemaIpc, [dataIpc]);
348+
const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() });
349+
let thrown: unknown;
350+
try {
351+
await backend.fetchChunk({ limit: 100 });
352+
} catch (err) {
353+
thrown = err;
354+
}
355+
expect(thrown).to.be.instanceOf(HiveDriverError);
356+
expect((thrown as Error).message).to.match(/YEAR_MONTH/);
357+
});
358+
309359
it('DAY-TIME zero → "0 00:00:00.000000000"', async () => {
310360
const ipc = buildDurationIpc('iv', FbTimeUnit.MICROSECOND, [BigInt(0)], 'INTERVAL');
311361
const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MICROSECOND, 'INTERVAL');

0 commit comments

Comments
 (0)