Skip to content

Commit ff84993

Browse files
committed
[SEA-NodeJS] report INTERVAL columns as STRING_TYPE (Thrift / Python kernel parity)
The SEA Arrow→Thrift type synthesis surfaced interval columns with the true INTERVAL_YEAR_MONTH / INTERVAL_DAY_TIME type ids, while the Thrift backend and the Python kernel connector both report interval columns with a STRING type code. The comparator flagged every interval column as a type-code mismatch. Map INTERVAL (via databricks.type_name, the rewritten-duration Int64 path, and the native Arrow interval fallback) to STRING_TYPE. The cell value is already rendered to the canonical interval string ("2-6" / "3 12:30:15.000000000") by ArrowResultConverter, which keys off the Arrow value type — not this synthesized TTypeId — so value formatting is unchanged. Verified against the comparator warehouse: STATEMENT_SELECT / EXTREME_VALUES interval columns now match the Thrift backend (type 7 + identical string value).
1 parent 3d8ac30 commit ff84993

1 file changed

Lines changed: 16 additions & 7 deletions

File tree

lib/sea/SeaArrowIpc.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
162162
return TTypeId.TIMESTAMP_TYPE;
163163
case 'DECIMAL':
164164
return TTypeId.DECIMAL_TYPE;
165+
// INTERVAL — surface as STRING_TYPE to match the Thrift backend and the
166+
// Python kernel connector, both of which report interval columns with a
167+
// string type code. The cell value is already rendered to the canonical
168+
// interval string (e.g. "2-6" / "3 12:30:15.000000000") by
169+
// ArrowResultConverter, which keys off the Arrow value type (not this
170+
// synthesized TTypeId), so value formatting is unaffected.
165171
case 'INTERVAL':
166172
case 'INTERVAL DAY':
167173
case 'INTERVAL DAY TO HOUR':
@@ -173,11 +179,10 @@ function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
173179
case 'INTERVAL MINUTE':
174180
case 'INTERVAL MINUTE TO SECOND':
175181
case 'INTERVAL SECOND':
176-
return TTypeId.INTERVAL_DAY_TIME_TYPE;
177182
case 'INTERVAL YEAR':
178183
case 'INTERVAL YEAR TO MONTH':
179184
case 'INTERVAL MONTH':
180-
return TTypeId.INTERVAL_YEAR_MONTH_TYPE;
185+
return TTypeId.STRING_TYPE;
181186
case 'ARRAY':
182187
return TTypeId.ARRAY_TYPE;
183188
case 'MAP':
@@ -198,10 +203,12 @@ function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
198203
if (DataType.isInt(arrowType)) {
199204
// Duration columns are rewritten to Int64 with a
200205
// `databricks.arrow.duration_unit` metadata marker (see
201-
// `SeaArrowIpcDurationFix.ts`). Surface them as INTERVAL_DAY_TIME
202-
// so the converter formats them back into the thrift string form.
206+
// `SeaArrowIpcDurationFix.ts`). Surface them as STRING_TYPE (matching the
207+
// Thrift backend and Python kernel) — the converter still formats the
208+
// value into the thrift INTERVAL DAY-TIME string via the duration_unit
209+
// metadata, independent of this type code.
203210
if (arrowType.bitWidth === 64 && field.metadata.has(DURATION_UNIT_METADATA_KEY)) {
204-
return TTypeId.INTERVAL_DAY_TIME_TYPE;
211+
return TTypeId.STRING_TYPE;
205212
}
206213
switch (arrowType.bitWidth) {
207214
case 8:
@@ -233,8 +240,10 @@ function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
233240
// pairs which the converter formats to thrift's `"Y-M"` / day-time
234241
// strings.
235242
if (DataType.isInterval(arrowType)) {
236-
// unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO
237-
return arrowType.unit === 0 ? TTypeId.INTERVAL_YEAR_MONTH_TYPE : TTypeId.INTERVAL_DAY_TIME_TYPE;
243+
// Surface native Arrow interval types as STRING_TYPE too (Thrift / Python
244+
// kernel parity). The converter formats the value to the thrift "Y-M" /
245+
// day-time string from the Arrow value, independent of this type code.
246+
return TTypeId.STRING_TYPE;
238247
}
239248
if (DataType.isList(arrowType)) return TTypeId.ARRAY_TYPE;
240249
if (DataType.isMap(arrowType)) return TTypeId.MAP_TYPE;

0 commit comments

Comments
 (0)