Skip to content

Commit 3dcb486

Browse files
committed
Add variant spec version to Variant type
1 parent 707a0a0 commit 3dcb486

8 files changed

Lines changed: 69 additions & 27 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
import org.apache.parquet.Preconditions;
4444

4545
public abstract class LogicalTypeAnnotation {
46+
public static final byte VARIANT_SPEC_VERSION = 1;
47+
4648
enum LogicalTypeToken {
4749
MAP {
4850
@Override
@@ -60,9 +62,8 @@ protected LogicalTypeAnnotation fromString(List<String> params) {
6062
@Override
6163
protected LogicalTypeAnnotation fromString(List<String> params) {
6264
Preconditions.checkArgument(
63-
params.isEmpty(), "Expecting 0 parameter for variant logical type, got %d", params.size());
64-
65-
return variantType();
65+
params.size() == 1, "Expecting 1 parameter for variant logical type, got %d", params.size());
66+
return variantType(Byte.parseByte(params.get(0)));
6667
}
6768
},
6869
STRING {
@@ -278,8 +279,8 @@ public static ListLogicalTypeAnnotation listType() {
278279
return ListLogicalTypeAnnotation.INSTANCE;
279280
}
280281

281-
public static VariantLogicalTypeAnnotation variantType() {
282-
return VariantLogicalTypeAnnotation.INSTANCE;
282+
public static VariantLogicalTypeAnnotation variantType(byte specificationVersion) {
283+
return new VariantLogicalTypeAnnotation(specificationVersion);
283284
}
284285

285286
public static EnumLogicalTypeAnnotation enumType() {
@@ -1142,9 +1143,11 @@ public int hashCode() {
11421143
}
11431144

11441145
public static class VariantLogicalTypeAnnotation extends LogicalTypeAnnotation {
1145-
private static final VariantLogicalTypeAnnotation INSTANCE = new VariantLogicalTypeAnnotation();
1146+
private byte specificationVersion;
11461147

1147-
private VariantLogicalTypeAnnotation() {}
1148+
private VariantLogicalTypeAnnotation(byte specificationVersion) {
1149+
this.specificationVersion = specificationVersion;
1150+
}
11481151

11491152
@Override
11501153
public OriginalType toOriginalType() {
@@ -1161,6 +1164,24 @@ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotat
11611164
LogicalTypeToken getType() {
11621165
return LogicalTypeToken.VARIANT;
11631166
}
1167+
1168+
public byte getSpecificationVersion() {
1169+
return this.specificationVersion;
1170+
}
1171+
1172+
@Override
1173+
protected String typeParametersAsString() {
1174+
return "(" + specificationVersion + ")";
1175+
}
1176+
1177+
@Override
1178+
public boolean equals(Object obj) {
1179+
if (!(obj instanceof VariantLogicalTypeAnnotation)) {
1180+
return false;
1181+
}
1182+
VariantLogicalTypeAnnotation other = (VariantLogicalTypeAnnotation) obj;
1183+
return specificationVersion == other.specificationVersion;
1184+
}
11641185
}
11651186

11661187
/**

parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,30 @@ private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder<?> bui
124124
t = st.nextToken();
125125
if (isLogicalType(t)) {
126126
LogicalTypeAnnotation.LogicalTypeToken logicalType = LogicalTypeAnnotation.LogicalTypeToken.valueOf(t);
127-
LogicalTypeAnnotation logicalTypeAnnotation = logicalType.fromString(new ArrayList<>());
127+
t = st.nextToken();
128+
List<String> tokens = new ArrayList<>();
129+
if ("(".equals(t)) {
130+
while (!")".equals(t)) {
131+
if (!(",".equals(t) || "(".equals(t) || ")".equals(t))) {
132+
tokens.add(t);
133+
}
134+
t = st.nextToken();
135+
}
136+
t = st.nextToken();
137+
}
138+
139+
LogicalTypeAnnotation logicalTypeAnnotation = logicalType.fromString(tokens);
128140
childBuilder.as(logicalTypeAnnotation);
129141
annotation = logicalTypeAnnotation.toString();
130142
} else {
131143
// Try to parse as OriginalType
132144
OriginalType originalType = OriginalType.valueOf(t);
133145
childBuilder.as(originalType);
134146
annotation = originalType.toString();
147+
t = st.nextToken();
135148
}
136149

137-
check(st.nextToken(), ")", "logical type ended by )", st);
150+
check(t, ")", "logical type ended by )", st);
138151
t = st.nextToken();
139152
}
140153
if (t.equals("=")) {

parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,15 +452,15 @@ public void testEmbeddedAnnotations() {
452452
@Test
453453
public void testVARIANTAnnotation() {
454454
String message = "message Message {\n"
455-
+ " required group aVariant (VARIANT) {\n"
455+
+ " required group aVariant (VARIANT(2)) {\n"
456456
+ " required binary metadata;\n"
457457
+ " required binary value;\n"
458458
+ " }\n"
459459
+ "}\n";
460460

461461
MessageType expected = buildMessage()
462462
.requiredGroup()
463-
.as(LogicalTypeAnnotation.variantType())
463+
.as(LogicalTypeAnnotation.variantType((byte) 2))
464464
.required(BINARY)
465465
.named("metadata")
466466
.required(BINARY)

parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
2222
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
23+
import static org.apache.parquet.schema.LogicalTypeAnnotation.VARIANT_SPEC_VERSION;
2324
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
2425
import static org.apache.parquet.schema.OriginalType.BSON;
2526
import static org.apache.parquet.schema.OriginalType.DATE;
@@ -1421,15 +1422,15 @@ public void testVariantLogicalType() {
14211422
GroupType variantExpected = new GroupType(
14221423
REQUIRED,
14231424
name,
1424-
LogicalTypeAnnotation.variantType(),
1425+
LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION),
14251426
new PrimitiveType(REQUIRED, BINARY, "metadata"),
14261427
new PrimitiveType(REQUIRED, BINARY, "value"));
14271428

14281429
GroupType variantActual = Types.buildGroup(REQUIRED)
14291430
.addFields(
14301431
Types.required(BINARY).named("metadata"),
14311432
Types.required(BINARY).named("value"))
1432-
.as(LogicalTypeAnnotation.variantType())
1433+
.as(LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION))
14331434
.named(name);
14341435

14351436
assertEquals(variantExpected, variantActual);
@@ -1441,7 +1442,7 @@ public void testVariantLogicalTypeWithShredded() {
14411442
GroupType variantExpected = new GroupType(
14421443
REQUIRED,
14431444
name,
1444-
LogicalTypeAnnotation.variantType(),
1445+
LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION),
14451446
new PrimitiveType(REQUIRED, BINARY, "metadata"),
14461447
new PrimitiveType(OPTIONAL, BINARY, "value"),
14471448
new PrimitiveType(OPTIONAL, BINARY, "typed_value", LogicalTypeAnnotation.stringType()));
@@ -1453,7 +1454,7 @@ public void testVariantLogicalTypeWithShredded() {
14531454
Types.optional(BINARY)
14541455
.as(LogicalTypeAnnotation.stringType())
14551456
.named("typed_value"))
1456-
.as(LogicalTypeAnnotation.variantType())
1457+
.as(LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION))
14571458
.named(name);
14581459

14591460
assertEquals(variantExpected, variantActual);

parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
2222
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
2323
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
24+
import static org.apache.parquet.schema.LogicalTypeAnnotation.VARIANT_SPEC_VERSION;
2425
import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
2526
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
2627
import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
@@ -481,7 +482,7 @@ public void testVariantLogicalType() {
481482
GroupType variant = new GroupType(
482483
REQUIRED,
483484
name,
484-
LogicalTypeAnnotation.variantType(),
485+
LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION),
485486
Types.required(BINARY).named("metadata"),
486487
Types.required(BINARY).named("value"));
487488

@@ -504,7 +505,7 @@ public void testVariantLogicalTypeWithShredded() {
504505
GroupType variant = new GroupType(
505506
REQUIRED,
506507
name,
507-
LogicalTypeAnnotation.variantType(),
508+
LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION),
508509
Types.required(BINARY).named("metadata"),
509510
Types.optional(BINARY).named("value"),
510511
Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("typed_value"));
@@ -522,6 +523,7 @@ public void testVariantLogicalTypeWithShredded() {
522523
assertNull(annotation.toOriginalType());
523524
assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation);
524525
}
526+
525527
/**
526528
* A convenience method to avoid a large number of @Test(expected=...) tests
527529
*

parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ public static LogicalType DECIMAL(int scale, int precision) {
3232
return LogicalType.DECIMAL(new DecimalType(scale, precision));
3333
}
3434

35+
public static LogicalType VARIANT(byte specificationVersion) {
36+
VariantType type = new VariantType();
37+
type.setSpecification_version(specificationVersion);
38+
return LogicalType.VARIANT(type);
39+
}
40+
3541
public static final LogicalType UTF8 = LogicalType.STRING(new StringType());
3642
public static final LogicalType MAP = LogicalType.MAP(new MapType());
3743
public static final LogicalType LIST = LogicalType.LIST(new ListType());
@@ -54,11 +60,4 @@ public static LogicalType DECIMAL(int scale, int precision) {
5460
public static final LogicalType BSON = LogicalType.BSON(new BsonType());
5561
public static final LogicalType FLOAT16 = LogicalType.FLOAT16(new Float16Type());
5662
public static final LogicalType UUID = LogicalType.UUID(new UUIDType());
57-
public static final LogicalType VARIANT = LogicalType.VARIANT(variant());
58-
59-
private static final VariantType variant() {
60-
VariantType type = new VariantType();
61-
type.setSpecification_version((byte) 1);
62-
return type;
63-
}
6463
}

parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
import org.apache.parquet.format.Type;
104104
import org.apache.parquet.format.TypeDefinedOrder;
105105
import org.apache.parquet.format.Uncompressed;
106+
import org.apache.parquet.format.VariantType;
106107
import org.apache.parquet.format.XxHash;
107108
import org.apache.parquet.hadoop.metadata.BlockMetaData;
108109
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -517,7 +518,7 @@ public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnno
517518

518519
@Override
519520
public Optional<LogicalType> visit(LogicalTypeAnnotation.VariantLogicalTypeAnnotation variantLogicalType) {
520-
return of(LogicalTypes.VARIANT);
521+
return of(LogicalTypes.VARIANT(variantLogicalType.getSpecificationVersion()));
521522
}
522523
}
523524

@@ -1183,7 +1184,8 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
11831184
case FLOAT16:
11841185
return LogicalTypeAnnotation.float16Type();
11851186
case VARIANT:
1186-
return LogicalTypeAnnotation.variantType();
1187+
VariantType variant = type.getVARIANT();
1188+
return LogicalTypeAnnotation.variantType(variant.getSpecification_version());
11871189
default:
11881190
throw new RuntimeException("Unknown logical type " + type);
11891191
}

parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
3030
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
3131
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
32+
import static org.apache.parquet.schema.LogicalTypeAnnotation.VARIANT_SPEC_VERSION;
3233
import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
3334
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
3435
import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
@@ -1594,7 +1595,7 @@ public void testMapConvertedTypeReadWrite() throws Exception {
15941595
public void testVariantLogicalType() {
15951596
MessageType expected = Types.buildMessage()
15961597
.requiredGroup()
1597-
.as(variantType())
1598+
.as(variantType(VARIANT_SPEC_VERSION))
15981599
.required(PrimitiveTypeName.BINARY)
15991600
.named("metadata")
16001601
.required(PrimitiveTypeName.BINARY)
@@ -1606,6 +1607,9 @@ public void testVariantLogicalType() {
16061607
List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
16071608
MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
16081609
assertEquals(expected, schema);
1610+
LogicalTypeAnnotation logicalType = schema.getType("v").getLogicalTypeAnnotation();
1611+
assertEquals(LogicalTypeAnnotation.variantType(VARIANT_SPEC_VERSION), logicalType);
1612+
assertEquals(VARIANT_SPEC_VERSION, ((LogicalTypeAnnotation.VariantLogicalTypeAnnotation) logicalType).getSpecificationVersion());
16091613
}
16101614

16111615
private void verifyMapMessageType(final MessageType messageType, final String keyValueName) throws IOException {

0 commit comments

Comments
 (0)