2121import java .util .Set ;
2222import javax .annotation .Nullable ;
2323import org .apache .avro .Schema ;
24- import org .apache .parquet .schema .PrimitiveType ;
2524import org .apache .pinot .plugin .inputformat .avro .AvroRecordExtractor ;
2625import org .apache .pinot .spi .data .readers .RecordExtractorConfig ;
2726
@@ -40,21 +39,26 @@ protected Object transformValue(Object value, Schema.Field field) {
4039
4140 Object handleDeprecatedTypes (Object value , Schema .Field field ) {
4241 Schema .Type avroColumnType = field .schema ().getType ();
43- if (avroColumnType == org . apache . avro . Schema .Type .UNION ) {
44- org . apache . avro . Schema nonNullSchema = null ;
45- for (org . apache . avro . Schema childFieldSchema : field .schema ().getTypes ()) {
46- if (childFieldSchema .getType () != org . apache . avro . Schema .Type .NULL ) {
42+ if (avroColumnType == Schema .Type .UNION ) {
43+ Schema nonNullSchema = null ;
44+ for (Schema childFieldSchema : field .schema ().getTypes ()) {
45+ if (childFieldSchema .getType () != Schema .Type .NULL ) {
4746 if (nonNullSchema == null ) {
4847 nonNullSchema = childFieldSchema ;
4948 } else {
5049 throw new IllegalStateException ("More than one non-null schema in UNION schema" );
5150 }
5251 }
5352 }
53+ assert nonNullSchema != null ;
5454
55- //INT96 is deprecated. We convert to long as we do in the native parquet extractor.
56- if (nonNullSchema .getName ().equals (PrimitiveType .PrimitiveTypeName .INT96 .name ())) {
57- return ParquetNativeRecordExtractor .convertInt96ToLong ((byte []) value );
55+ // NOTE:
56+ // INT96 is deprecated. We convert to long as we do in the native parquet extractor.
57+ // See org.apache.parquet.avro.AvroSchemaConverter about how INT96 is converted into Avro schema.
58+ // We have to rely on the doc to determine whether a field is INT96.
59+ if (nonNullSchema .getType () == Schema .Type .FIXED && nonNullSchema .getFixedSize () == 12
60+ && "INT96 represented as byte[12]" .equals (nonNullSchema .getDoc ())) {
61+ return ParquetNativeRecordExtractor .convertInt96ToLong ((byte []) value );
5862 }
5963 }
6064 return value ;
0 commit comments