Skip to content

Commit f29600d

Browse files
authored
Bump parquet from 1.16.0 to 1.17.0 (#17504)
1 parent d0a6aba commit f29600d

2 files changed

Lines changed: 13 additions & 9 deletions

File tree

pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import java.util.Set;
2222
import javax.annotation.Nullable;
2323
import org.apache.avro.Schema;
24-
import org.apache.parquet.schema.PrimitiveType;
2524
import org.apache.pinot.plugin.inputformat.avro.AvroRecordExtractor;
2625
import org.apache.pinot.spi.data.readers.RecordExtractorConfig;
2726

@@ -40,21 +39,26 @@ protected Object transformValue(Object value, Schema.Field field) {
4039

4140
Object handleDeprecatedTypes(Object value, Schema.Field field) {
4241
Schema.Type avroColumnType = field.schema().getType();
43-
if (avroColumnType == org.apache.avro.Schema.Type.UNION) {
44-
org.apache.avro.Schema nonNullSchema = null;
45-
for (org.apache.avro.Schema childFieldSchema : field.schema().getTypes()) {
46-
if (childFieldSchema.getType() != org.apache.avro.Schema.Type.NULL) {
42+
if (avroColumnType == Schema.Type.UNION) {
43+
Schema nonNullSchema = null;
44+
for (Schema childFieldSchema : field.schema().getTypes()) {
45+
if (childFieldSchema.getType() != Schema.Type.NULL) {
4746
if (nonNullSchema == null) {
4847
nonNullSchema = childFieldSchema;
4948
} else {
5049
throw new IllegalStateException("More than one non-null schema in UNION schema");
5150
}
5251
}
5352
}
53+
assert nonNullSchema != null;
5454

55-
//INT96 is deprecated. We convert to long as we do in the native parquet extractor.
56-
if (nonNullSchema.getName().equals(PrimitiveType.PrimitiveTypeName.INT96.name())) {
57-
return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
55+
// NOTE:
56+
// INT96 is deprecated. We convert to long as we do in the native parquet extractor.
57+
// See org.apache.parquet.avro.AvroSchemaConverter about how INT96 is converted into Avro schema.
58+
// We have to rely on the doc to determine whether a field is INT96.
59+
if (nonNullSchema.getType() == Schema.Type.FIXED && nonNullSchema.getFixedSize() == 12
60+
&& "INT96 represented as byte[12]".equals(nonNullSchema.getDoc())) {
61+
return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
5862
}
5963
}
6064
return value;

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140

141141
<arrow.version>18.3.0</arrow.version>
142142
<avro.version>1.12.1</avro.version>
143-
<parquet.version>1.16.0</parquet.version>
143+
<parquet.version>1.17.0</parquet.version>
144144
<orc.version>1.9.8</orc.version>
145145
<hive.version>2.8.1</hive.version>
146146
<helix.version>1.3.2</helix.version>

0 commit comments

Comments
 (0)