@@ -450,6 +450,9 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
450450 {"id" : 13 , "name" : "decimal8" , "required" : False , "type" : "decimal(5, 2)" },
451451 {"id" : 14 , "name" : "decimal16" , "required" : False , "type" : "decimal(16, 6)" },
452452 {"id" : 15 , "name" : "decimal32" , "required" : False , "type" : "decimal(19, 6)" },
453+ {"id" : 16 , "name" : "empty_decimal8" , "required" : False , "type" : "decimal(5, 2)" },
454+ {"id" : 17 , "name" : "empty_decimal16" , "required" : False , "type" : "decimal(16, 6)" },
455+ {"id" : 18 , "name" : "empty_decimal32" , "required" : False , "type" : "decimal(19, 6)" },
453456 ],
454457 },
455458 ],
@@ -477,6 +480,9 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
477480 decimal8 = pa .array ([Decimal ("123.45" ), Decimal ("678.91" )], pa .decimal128 (8 , 2 ))
478481 decimal16 = pa .array ([Decimal ("12345679.123456" ), Decimal ("67891234.678912" )], pa .decimal128 (16 , 6 ))
479482 decimal32 = pa .array ([Decimal ("1234567890123.123456" ), Decimal ("9876543210703.654321" )], pa .decimal128 (19 , 6 ))
483+ empty_decimal8 = pa .array ([None , None ], pa .decimal128 (8 , 2 ))
484+ empty_decimal16 = pa .array ([None , None ], pa .decimal128 (16 , 6 ))
485+ empty_decimal32 = pa .array ([None , None ], pa .decimal128 (19 , 6 ))
480486
481487 table = pa .Table .from_pydict (
482488 {
@@ -495,6 +501,9 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
495501 "decimal8" : decimal8 ,
496502 "decimal16" : decimal16 ,
497503 "decimal32" : decimal32 ,
504+ "empty_decimal8" : empty_decimal8 ,
505+ "empty_decimal16" : empty_decimal16 ,
506+ "empty_decimal32" : empty_decimal32 ,
498507 },
499508 schema = arrow_schema ,
500509 )
@@ -520,8 +529,14 @@ def test_metrics_primitive_types() -> None:
520529 )
521530 datafile = DataFile .from_args (** statistics .to_serialized_dict ())
522531
523- assert len (datafile .value_counts ) == 15
524- assert len (datafile .null_value_counts ) == 15
532+ assert len (datafile .value_counts ) == 18
533+ assert len (datafile .null_value_counts ) == 18
534+ assert datafile .null_value_counts [13 ] != datafile .value_counts [13 ]
535+ assert datafile .null_value_counts [14 ] != datafile .value_counts [14 ]
536+ assert datafile .null_value_counts [15 ] != datafile .value_counts [15 ]
537+ assert datafile .null_value_counts [16 ] == datafile .value_counts [16 ]
538+ assert datafile .null_value_counts [17 ] == datafile .value_counts [17 ]
539+ assert datafile .null_value_counts [18 ] == datafile .value_counts [18 ]
525540 assert len (datafile .nan_value_counts ) == 0
526541
527542 tz = timezone (timedelta (seconds = 19800 ))
@@ -542,6 +557,7 @@ def test_metrics_primitive_types() -> None:
542557 assert datafile .lower_bounds [13 ][::- 1 ].ljust (4 , b"\x00 " ) == STRUCT_INT32 .pack (12345 )
543558 assert datafile .lower_bounds [14 ][::- 1 ].ljust (8 , b"\x00 " ) == STRUCT_INT64 .pack (12345679123456 )
544559 assert str (int .from_bytes (datafile .lower_bounds [15 ], byteorder = "big" , signed = True )).encode ("utf-8" ) == b"1234567890123123456"
560+ assert not any (key in datafile .lower_bounds .keys () for key in [16 , 17 , 18 ])
545561
546562 assert len (datafile .upper_bounds ) == 15
547563 assert datafile .upper_bounds [1 ] == STRUCT_BOOL .pack (True )
@@ -559,6 +575,7 @@ def test_metrics_primitive_types() -> None:
559575 assert datafile .upper_bounds [13 ][::- 1 ].ljust (4 , b"\x00 " ) == STRUCT_INT32 .pack (67891 )
560576 assert datafile .upper_bounds [14 ][::- 1 ].ljust (8 , b"\x00 " ) == STRUCT_INT64 .pack (67891234678912 )
561577 assert str (int .from_bytes (datafile .upper_bounds [15 ], byteorder = "big" , signed = True )).encode ("utf-8" ) == b"9876543210703654321"
578+ assert not any (key in datafile .upper_bounds .keys () for key in [16 , 17 , 18 ])
562579
563580
564581def construct_test_table_invalid_upper_bound () -> Tuple [pq .FileMetaData , Union [TableMetadataV1 , TableMetadataV2 ]]:
0 commit comments