@@ -253,13 +253,24 @@ TEST(AggregateTest, AggregatesFromDataFileMetrics) {
253253
254254 ICEBERG_UNWRAP_OR_FAIL (auto lower, Literal::Int (5 ).Serialize ());
255255 ICEBERG_UNWRAP_OR_FAIL (auto upper, Literal::Int (50 ).Serialize ());
256- DataFile file{
257- .record_count = 10 ,
258- .value_counts = {{1 , 10 }, {2 , 10 }},
259- .null_value_counts = {{1 , 2 }, {2 , 0 }},
260- .lower_bounds = {{2 , lower}},
261- .upper_bounds = {{2 , upper}},
262- };
256+ DataFile file{.file_path = " " ,
257+ .partition = {},
258+ .record_count = 10 ,
259+ .column_sizes = {},
260+ .value_counts = {{1 , 10 }, {2 , 10 }},
261+ .null_value_counts = {{1 , 2 }, {2 , 0 }},
262+ .nan_value_counts = {},
263+ .lower_bounds = {{2 , lower}},
264+ .upper_bounds = {{2 , upper}},
265+ .key_metadata = {},
266+ .split_offsets = {},
267+ .equality_ids = {},
268+ .sort_order_id = std::nullopt ,
269+ .first_row_id = std::nullopt ,
270+ .referenced_data_file = std::nullopt ,
271+ .content_offset = std::nullopt ,
272+ .content_size_in_bytes = std::nullopt ,
273+ .partition_spec_id = std::nullopt };
263274
264275 ASSERT_TRUE (evaluator->Update (file).has_value ());
265276
@@ -286,7 +297,24 @@ TEST(AggregateTest, AggregatesFromDataFileMissingMetricsReturnNull) {
286297 count_bound, count_null_bound, count_star_bound, max_bound, min_bound};
287298 ICEBERG_UNWRAP_OR_FAIL (auto evaluator, AggregateEvaluator::Make (aggregates));
288299
289- DataFile file{.record_count = -1 }; // missing/invalid
300+ DataFile file{.file_path = " " ,
301+ .partition = {},
302+ .record_count = -1 ,
303+ .column_sizes = {},
304+ .value_counts = {},
305+ .null_value_counts = {},
306+ .nan_value_counts = {},
307+ .lower_bounds = {},
308+ .upper_bounds = {},
309+ .key_metadata = {},
310+ .split_offsets = {},
311+ .equality_ids = {},
312+ .sort_order_id = std::nullopt ,
313+ .first_row_id = std::nullopt ,
314+ .referenced_data_file = std::nullopt ,
315+ .content_offset = std::nullopt ,
316+ .content_size_in_bytes = std::nullopt ,
317+ .partition_spec_id = std::nullopt };
290318
291319 ASSERT_TRUE (evaluator->Update (file).has_value ());
292320
@@ -309,13 +337,24 @@ TEST(AggregateTest, AggregatesFromDataFileWithTransform) {
309337
310338 ICEBERG_UNWRAP_OR_FAIL (auto lower, Literal::Int (5 ).Serialize ());
311339 ICEBERG_UNWRAP_OR_FAIL (auto upper, Literal::Int (23 ).Serialize ());
312- DataFile file{
313- .record_count = 5 ,
314- .value_counts = {{1 , 5 }},
315- .null_value_counts = {{1 , 0 }},
316- .lower_bounds = {{1 , lower}},
317- .upper_bounds = {{1 , upper}},
318- };
340+ DataFile file{.file_path = " " ,
341+ .partition = {},
342+ .record_count = 5 ,
343+ .column_sizes = {},
344+ .value_counts = {{1 , 5 }},
345+ .null_value_counts = {{1 , 0 }},
346+ .nan_value_counts = {},
347+ .lower_bounds = {{1 , lower}},
348+ .upper_bounds = {{1 , upper}},
349+ .key_metadata = {},
350+ .split_offsets = {},
351+ .equality_ids = {},
352+ .sort_order_id = std::nullopt ,
353+ .first_row_id = std::nullopt ,
354+ .referenced_data_file = std::nullopt ,
355+ .content_offset = std::nullopt ,
356+ .content_size_in_bytes = std::nullopt ,
357+ .partition_spec_id = std::nullopt };
319358
320359 ASSERT_TRUE (evaluator->Update (file).has_value ());
321360
@@ -344,34 +383,64 @@ TEST(AggregateTest, DataFileAggregatorParity) {
344383 };
345384
346385 auto [b1_lower, b1_upper] = make_bounds (1 , 33 , 2345 );
347- DataFile file{
348- .file_path = " file.avro" ,
349- .record_count = 50 ,
350- .value_counts = {{1 , 50 }, {3 , 50 }, {4 , 50 }},
351- .null_value_counts = {{1 , 10 }, {3 , 50 }, {4 , 10 }},
352- .lower_bounds = std::move (b1_lower),
353- .upper_bounds = std::move (b1_upper),
354- };
386+ DataFile file{.file_path = " file.avro" ,
387+ .partition = {},
388+ .record_count = 50 ,
389+ .column_sizes = {},
390+ .value_counts = {{1 , 50 }, {3 , 50 }, {4 , 50 }},
391+ .null_value_counts = {{1 , 10 }, {3 , 50 }, {4 , 10 }},
392+ .nan_value_counts = {},
393+ .lower_bounds = std::move (b1_lower),
394+ .upper_bounds = std::move (b1_upper),
395+ .key_metadata = {},
396+ .split_offsets = {},
397+ .equality_ids = {},
398+ .sort_order_id = std::nullopt ,
399+ .first_row_id = std::nullopt ,
400+ .referenced_data_file = std::nullopt ,
401+ .content_offset = std::nullopt ,
402+ .content_size_in_bytes = std::nullopt ,
403+ .partition_spec_id = std::nullopt };
355404
356405 auto [b2_lower, b2_upper] = make_bounds (1 , 33 , 100 );
357- DataFile missing_some_nulls_1{
358- .file_path = " file_2.avro" ,
359- .record_count = 20 ,
360- .value_counts = {{1 , 20 }, {3 , 20 }},
361- .null_value_counts = {{1 , 0 }, {3 , 20 }},
362- .lower_bounds = std::move (b2_lower),
363- .upper_bounds = std::move (b2_upper),
364- };
406+ DataFile missing_some_nulls_1{.file_path = " file_2.avro" ,
407+ .partition = {},
408+ .record_count = 20 ,
409+ .column_sizes = {},
410+ .value_counts = {{1 , 20 }, {3 , 20 }},
411+ .null_value_counts = {{1 , 0 }, {3 , 20 }},
412+ .nan_value_counts = {},
413+ .lower_bounds = std::move (b2_lower),
414+ .upper_bounds = std::move (b2_upper),
415+ .key_metadata = {},
416+ .split_offsets = {},
417+ .equality_ids = {},
418+ .sort_order_id = std::nullopt ,
419+ .first_row_id = std::nullopt ,
420+ .referenced_data_file = std::nullopt ,
421+ .content_offset = std::nullopt ,
422+ .content_size_in_bytes = std::nullopt ,
423+ .partition_spec_id = std::nullopt };
365424
366425 auto [b3_lower, b3_upper] = make_bounds (1 , -33 , 3333 );
367- DataFile missing_some_nulls_2{
368- .file_path = " file_3.avro" ,
369- .record_count = 20 ,
370- .value_counts = {{1 , 20 }, {3 , 20 }},
371- .null_value_counts = {{1 , 20 }, {3 , 20 }},
372- .lower_bounds = std::move (b3_lower),
373- .upper_bounds = std::move (b3_upper),
374- };
426+ DataFile missing_some_nulls_2{.file_path = " file_3.avro" ,
427+ .partition = {},
428+ .record_count = 20 ,
429+ .column_sizes = {},
430+ .value_counts = {{1 , 20 }, {3 , 20 }},
431+ .null_value_counts = {{1 , 20 }, {3 , 20 }},
432+ .nan_value_counts = {},
433+ .lower_bounds = std::move (b3_lower),
434+ .upper_bounds = std::move (b3_upper),
435+ .key_metadata = {},
436+ .split_offsets = {},
437+ .equality_ids = {},
438+ .sort_order_id = std::nullopt ,
439+ .first_row_id = std::nullopt ,
440+ .referenced_data_file = std::nullopt ,
441+ .content_offset = std::nullopt ,
442+ .content_size_in_bytes = std::nullopt ,
443+ .partition_spec_id = std::nullopt };
375444
376445 DataFile missing_some_stats{
377446 .file_path = " file_missing_stats.avro" ,
@@ -382,11 +451,24 @@ TEST(AggregateTest, DataFileAggregatorParity) {
382451 missing_some_stats.lower_bounds = std::move (b4_lower);
383452 missing_some_stats.upper_bounds = std::move (b4_upper);
384453
385- DataFile missing_all_optional_stats{
386- .file_path = " file_null_stats.avro" ,
387- .record_count = 20 ,
388- };
389-
454+ DataFile missing_all_optional_stats{.file_path = " file_null_stats.avro" ,
455+ .partition = {},
456+ .record_count = 20 ,
457+ .column_sizes = {},
458+ .value_counts = {},
459+ .null_value_counts = {},
460+ .nan_value_counts = {},
461+ .lower_bounds = {},
462+ .upper_bounds = {},
463+ .key_metadata = {},
464+ .split_offsets = {},
465+ .equality_ids = {},
466+ .sort_order_id = std::nullopt ,
467+ .first_row_id = std::nullopt ,
468+ .referenced_data_file = std::nullopt ,
469+ .content_offset = std::nullopt ,
470+ .content_size_in_bytes = std::nullopt ,
471+ .partition_spec_id = std::nullopt };
390472 auto run_case = [&](const std::vector<std::shared_ptr<Expression>>& exprs,
391473 const std::vector<DataFile>& files,
392474 const std::vector<std::optional<Scalar>>& expected,
0 commit comments