Skip to content

Commit 45142c9

Browse files
committed
refactor: round 2 of PR feedback — fix CHAR/VARCHAR roundtrip, docs
Addresses the second round of review comments on #174. - HyperTypeToArrow.toFieldType now stamps Hyper-compatible field metadata for string kinds: hyper:type=Char1 for CHAR(1), hyper:type=Char + hyper:max_string_length for CHAR(n), and hyper:max_string_length alone for bounded VARCHAR(n). The decode path in ArrowToHyperTypeMapper already reads these keys (it was written for Hyper's inbound Arrow stream), so CHAR(1), CHAR(n) and VARCHAR(n) now round-trip through Arrow without loss. Unbounded VARCHAR stays metadata-free. - HyperTypeArrowRoundtripTest expanded: CHAR(1), CHAR(16), VARCHAR(255) and VARCHAR(1024) are now positive parameterised cases. TIME_TZ stays explicitly lossy — Arrow Time has no timezone slot, time-with-time-zone stores a per-value offset that cannot ride on per-column metadata, and the driver's read-side accessor factory also drops TZ on ingress. Comment rewritten to document why fixing it needs either a synthetic Arrow schema or Hyper protocol coordination, rather than a metadata stamp. - HyperTypes.toJdbcType: inline comment on the OID branch explaining why it surfaces as BIGINT (pgjdbc parity; preserves getLong() on OID columns; internal HyperType.oid keeps precision accurate). - SimpleResultSet: class-level javadoc now calls out that this implementation deliberately only covers the JDBC-metadata type slice (INT32 for columns like DATA_TYPE, CHAR/VARCHAR, integer family via isIntegerLike) and is planned for removal once DataCloudMetadataResultSet migrates to StreamingResultSet. Also worth flagging as an observable behavior alongside the ones listed in the first commit: DataCloudDatabaseMetadata.getTypeInfo() returns a real 18-column result derived from HyperTypeKind (was empty before). Mentioning here since the reviewer asked for it in the "Observable behaviour changes" section and I'm avoiding amending the first commit on a published PR.
1 parent c25a003 commit 45142c9

4 files changed

Lines changed: 93 additions & 49 deletions

File tree

jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/resultset/SimpleResultSet.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@
3939
*
4040
* Access to SQL values is provided via {@link ColumnAccessor} instances. This class
4141
* already takes care of casting from SQL types to the compatible Java types.
42+
*
43+
* <p><b>Planned for removal.</b> This implementation only covers the narrow slice of
44+
* {@link HyperTypeKind} used by JDBC metadata result sets (INT32 for columns like
45+
* {@code DATA_TYPE}, CHAR/VARCHAR for text columns, and INT8-INT64/OID via the
46+
* {@code isIntegerLike} helper) — not the full query-result type universe. Rather
47+
* than expanding the {@link #getLong}, {@link #getDouble}, {@link #getBigDecimal},
48+
* {@link #getObject} switches to cover every kind, we intend to migrate
49+
* {@code DataCloudMetadataResultSet} to build on {@link StreamingResultSet} so there
50+
* is only one result-set implementation in the driver. Treat this class as
51+
* maintenance-only until that refactor lands.
4252
*/
4353
@AllArgsConstructor
4454
public abstract class SimpleResultSet<SELF>

jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/types/HyperTypes.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,13 @@ public static JDBCType toJdbcType(HyperType t) {
5454
case INT32:
5555
return JDBCType.INTEGER;
5656
case INT64:
57+
return JDBCType.BIGINT;
5758
case OID:
59+
// Hyper's OID is an unsigned 32-bit internal type. We surface it as BIGINT to
60+
// match the pgjdbc reference (see JDBCReferenceTest): pgjdbc reports oid with
61+
// Types.BIGINT, which keeps `getLong("oid_col")` working for integer-treating
62+
// callers. HyperType.oid stays a distinct kind so the pg_catalog path can keep
63+
// precision/display accurate (10 digits for the u32 range).
5864
return JDBCType.BIGINT;
5965
case FLOAT4:
6066
return JDBCType.REAL;

jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/protocol/data/HyperTypeToArrow.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
package com.salesforce.datacloud.jdbc.protocol.data;
66

77
import java.util.Collections;
8+
import java.util.HashMap;
9+
import java.util.Map;
810
import org.apache.arrow.vector.types.DateUnit;
911
import org.apache.arrow.vector.types.FloatingPointPrecision;
1012
import org.apache.arrow.vector.types.TimeUnit;
@@ -35,10 +37,46 @@ public static Field toField(String name, HyperType type) {
3537
return new Field(name, toFieldType(type), null);
3638
}
3739

38-
/** Map a {@link HyperType} to an Arrow {@link FieldType}. */
40+
/**
41+
* Map a {@link HyperType} to an Arrow {@link FieldType}.
42+
*
43+
* <p>For string kinds Arrow only has the unparameterised {@code Utf8}, so we stamp the
44+
* Hyper-side length on the field as metadata using the same keys Hyper's own server emits
45+
* ({@code hyper:type} and {@code hyper:max_string_length}). {@link ArrowToHyperTypeMapper}
46+
* reads those keys back, letting CHAR(n) / CHAR(1) / VARCHAR(n) round-trip through Arrow
47+
* without loss.
48+
*/
3949
public static FieldType toFieldType(HyperType type) {
4050
ArrowType arrowType = toArrowType(type);
41-
return type.isNullable() ? FieldType.nullable(arrowType) : FieldType.notNullable(arrowType);
51+
Map<String, String> metadata = metadataFor(type);
52+
if (type.isNullable()) {
53+
return new FieldType(true, arrowType, null, metadata);
54+
}
55+
return new FieldType(false, arrowType, null, metadata);
56+
}
57+
58+
/** Hyper-compatible field metadata for types whose length is not carried in the ArrowType. */
59+
private static Map<String, String> metadataFor(HyperType type) {
60+
switch (type.getKind()) {
61+
case CHAR:
62+
if (type.getPrecision() == 1) {
63+
return Collections.singletonMap("hyper:type", "Char1");
64+
}
65+
// A general CHAR(n). Stamp both the discriminator and the length.
66+
Map<String, String> charMeta = new HashMap<>();
67+
charMeta.put("hyper:type", "Char");
68+
charMeta.put("hyper:max_string_length", Integer.toString(type.getPrecision()));
69+
return charMeta;
70+
case VARCHAR:
71+
if (type.getPrecision() == HyperType.UNLIMITED_LENGTH) {
72+
// No length to stamp — unbounded VARCHAR is indistinguishable from "text" in
73+
// Arrow and that's fine; both decode to varcharUnlimited.
74+
return null;
75+
}
76+
return Collections.singletonMap("hyper:max_string_length", Integer.toString(type.getPrecision()));
77+
default:
78+
return null;
79+
}
4280
}
4381

4482
/** Map a {@link HyperType} to an Arrow {@link ArrowType} (no nullability). */

jdbc-core/src/test/java/com/salesforce/datacloud/jdbc/protocol/data/HyperTypeArrowRoundtripTest.java

Lines changed: 37 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66

77
import static org.assertj.core.api.Assertions.assertThat;
88

9+
import java.util.Collections;
910
import java.util.stream.Stream;
10-
import org.apache.arrow.vector.types.pojo.ArrowType;
1111
import org.apache.arrow.vector.types.pojo.Field;
12-
import org.apache.arrow.vector.types.pojo.FieldType;
1312
import org.junit.jupiter.api.Test;
1413
import org.junit.jupiter.params.ParameterizedTest;
1514
import org.junit.jupiter.params.provider.Arguments;
@@ -27,8 +26,10 @@ class HyperTypeArrowRoundtripTest {
2726

2827
/**
2928
* Core positive cases. Every HyperType listed here must round-trip back to an equal
30-
* HyperType. Kinds with documented lossy encodings (CHAR/bounded VARCHAR, TIME_TZ) live in
31-
* {@link #deviationsFromRoundtrip()} instead.
29+
* HyperType. CHAR(n) / CHAR(1) / VARCHAR(n) round-trip via {@code hyper:*} field metadata
30+
* stamped by {@link HyperTypeToArrow#toFieldType} and read back by
31+
* {@link ArrowToHyperTypeMapper}. TIME_TZ lives in {@link #lossyRoundtrip_timeTzCollapsesToTime}
32+
* instead (see comment there for why it stays lossy).
3233
*/
3334
static Stream<Arguments> encodableTypes() {
3435
return Stream.of(
@@ -42,6 +43,10 @@ static Stream<Arguments> encodableTypes() {
4243
Arguments.of(HyperType.float8(false)),
4344
Arguments.of(HyperType.decimal(10, 2, true)),
4445
Arguments.of(HyperType.decimal(38, 0, false)),
46+
Arguments.of(HyperType.fixedChar(1, true)),
47+
Arguments.of(HyperType.fixedChar(16, true)),
48+
Arguments.of(HyperType.varchar(255, false)),
49+
Arguments.of(HyperType.varchar(1024, true)),
4550
Arguments.of(HyperType.varcharUnlimited(true)),
4651
Arguments.of(HyperType.binary(16, true)),
4752
Arguments.of(HyperType.varbinary(false)),
@@ -56,21 +61,16 @@ static Stream<Arguments> encodableTypes() {
5661
@ParameterizedTest
5762
@MethodSource("encodableTypes")
5863
void roundtripReproducesHyperType(HyperType original) {
59-
// Forward: HyperType -> Arrow.
60-
ArrowType arrowType = HyperTypeToArrow.toArrowType(original);
61-
Field field = HyperTypeKind.ARRAY == original.getKind()
62-
// Arrow list Fields need a child field whose type matches the list element.
63-
? new Field(
64-
"col",
65-
new FieldType(original.isNullable(), arrowType, null),
66-
java.util.Collections.singletonList(new Field(
67-
"",
68-
new FieldType(
69-
original.getElement().isNullable(),
70-
HyperTypeToArrow.toArrowType(original.getElement()),
71-
null),
72-
java.util.Collections.emptyList())))
73-
: new Field("col", new FieldType(original.isNullable(), arrowType, null), null);
64+
// Forward via the full toField helper so any field-level metadata (hyper:type,
65+
// hyper:max_string_length) travels alongside the ArrowType.
66+
Field field;
67+
if (original.getKind() == HyperTypeKind.ARRAY) {
68+
// Arrow list Fields need a child field whose type matches the list element.
69+
Field child = new Field("", HyperTypeToArrow.toFieldType(original.getElement()), Collections.emptyList());
70+
field = new Field("col", HyperTypeToArrow.toFieldType(original), Collections.singletonList(child));
71+
} else {
72+
field = new Field("col", HyperTypeToArrow.toFieldType(original), null);
73+
}
7474

7575
// Reverse: Arrow Field -> HyperType.
7676
HyperType roundTripped = ArrowToHyperTypeMapper.toHyperType(field);
@@ -96,37 +96,27 @@ void nonEncodableKinds() {
9696
assertThat(HyperTypeKind.NULL).isNotNull();
9797
}
9898

99-
/**
100-
* Known, documented lossy round-trips. Each is something Arrow cannot preserve without
101-
* additional Hyper-specific field metadata, which the Arrow stream Hyper emits today does
102-
* not include on the parameter-binding side. Fixing any of these needs
103-
* {@link HyperTypeToArrow} to stamp hyper:* metadata on the Field and
104-
* {@link ArrowToHyperTypeMapper} to read it back.
105-
*/
106-
@Test
107-
void lossyRoundtrip_fixedCharCollapsesToVarcharUnlimited() {
108-
// HyperTypeToArrow maps CHAR(n) to Utf8 (Arrow has no fixed-length string). The reverse
109-
// mapping has no length hint and reports unlimited VARCHAR.
110-
HyperType original = HyperType.fixedChar(16, true);
111-
Field field = new Field("col", new FieldType(true, HyperTypeToArrow.toArrowType(original), null), null);
112-
assertThat(ArrowToHyperTypeMapper.toHyperType(field)).isEqualTo(HyperType.varcharUnlimited(true));
113-
}
114-
115-
@Test
116-
void lossyRoundtrip_boundedVarcharCollapsesToUnlimited() {
117-
// HyperTypeToArrow maps VARCHAR(n) to Utf8 with no length metadata; the reverse cannot
118-
// reconstruct the declared bound.
119-
HyperType original = HyperType.varchar(255, false);
120-
Field field = new Field("col", new FieldType(false, HyperTypeToArrow.toArrowType(original), null), null);
121-
assertThat(ArrowToHyperTypeMapper.toHyperType(field)).isEqualTo(HyperType.varcharUnlimited(false));
122-
}
123-
12499
@Test
125100
void lossyRoundtrip_timeTzCollapsesToTime() {
126-
// HyperTypeToArrow maps both TIME and TIME_TZ to Arrow Time(MICROSECOND, 64); Arrow Time
127-
// has no timezone slot, so the distinction is lost on the way back.
101+
// TIME WITH TIME ZONE cannot round-trip through Arrow today, and fixing it cleanly needs
102+
// more than a metadata stamp:
103+
//
104+
// - Arrow's Time type has no timezone slot (unlike Timestamp, whose Timestamp.timezone
105+
// field we already use for TIMESTAMP_TZ). So a per-value offset cannot ride on the
106+
// ArrowType itself.
107+
// - SQL `time with time zone` stores a per-*value* UTC offset (e.g. "14:00+02" vs
108+
// "14:00-05" in the same column). Field metadata is per-column, so it cannot carry
109+
// a value-varying offset.
110+
// - The driver's read-side QueryJDBCAccessorFactory has no TIME_TZ branch either —
111+
// server-returned `timetz` columns already collapse to the plain TimeVectorAccessor,
112+
// so the offset is dropped on ingress as well. Fixing the write-side alone would
113+
// create an asymmetry worse than the current uniform loss.
114+
//
115+
// Proper round-trip needs a synthetic schema (e.g. struct{time, offset_seconds}) or a
116+
// Hyper-protocol coordination. Leaving TIME_TZ explicitly pinned as lossy until that
117+
// design lands.
128118
HyperType original = HyperType.timeTz(true);
129-
Field field = new Field("col", new FieldType(true, HyperTypeToArrow.toArrowType(original), null), null);
119+
Field field = new Field("col", HyperTypeToArrow.toFieldType(original), null);
130120
assertThat(ArrowToHyperTypeMapper.toHyperType(field)).isEqualTo(HyperType.time(true));
131121
}
132122
}

0 commit comments

Comments
 (0)