Skip to content

Commit 71d6af5

Browse files
committed
refactor: initialize missing fields in DataFile structure for aggregate tests
1 parent 60af2dd commit 71d6af5

File tree

2 files changed

+161
-57
lines changed

2 files changed

+161
-57
lines changed

src/iceberg/test/aggregate_test.cc

Lines changed: 126 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,24 @@ TEST(AggregateTest, AggregatesFromDataFileMetrics) {
253253

254254
ICEBERG_UNWRAP_OR_FAIL(auto lower, Literal::Int(5).Serialize());
255255
ICEBERG_UNWRAP_OR_FAIL(auto upper, Literal::Int(50).Serialize());
256-
DataFile file{
257-
.record_count = 10,
258-
.value_counts = {{1, 10}, {2, 10}},
259-
.null_value_counts = {{1, 2}, {2, 0}},
260-
.lower_bounds = {{2, lower}},
261-
.upper_bounds = {{2, upper}},
262-
};
256+
DataFile file{.file_path = "",
257+
.partition = {},
258+
.record_count = 10,
259+
.column_sizes = {},
260+
.value_counts = {{1, 10}, {2, 10}},
261+
.null_value_counts = {{1, 2}, {2, 0}},
262+
.nan_value_counts = {},
263+
.lower_bounds = {{2, lower}},
264+
.upper_bounds = {{2, upper}},
265+
.key_metadata = {},
266+
.split_offsets = {},
267+
.equality_ids = {},
268+
.sort_order_id = std::nullopt,
269+
.first_row_id = std::nullopt,
270+
.referenced_data_file = std::nullopt,
271+
.content_offset = std::nullopt,
272+
.content_size_in_bytes = std::nullopt,
273+
.partition_spec_id = std::nullopt};
263274

264275
ASSERT_TRUE(evaluator->Update(file).has_value());
265276

@@ -286,7 +297,24 @@ TEST(AggregateTest, AggregatesFromDataFileMissingMetricsReturnNull) {
286297
count_bound, count_null_bound, count_star_bound, max_bound, min_bound};
287298
ICEBERG_UNWRAP_OR_FAIL(auto evaluator, AggregateEvaluator::Make(aggregates));
288299

289-
DataFile file{.record_count = -1}; // missing/invalid
300+
DataFile file{.file_path = "",
301+
.partition = {},
302+
.record_count = -1,
303+
.column_sizes = {},
304+
.value_counts = {},
305+
.null_value_counts = {},
306+
.nan_value_counts = {},
307+
.lower_bounds = {},
308+
.upper_bounds = {},
309+
.key_metadata = {},
310+
.split_offsets = {},
311+
.equality_ids = {},
312+
.sort_order_id = std::nullopt,
313+
.first_row_id = std::nullopt,
314+
.referenced_data_file = std::nullopt,
315+
.content_offset = std::nullopt,
316+
.content_size_in_bytes = std::nullopt,
317+
.partition_spec_id = std::nullopt};
290318

291319
ASSERT_TRUE(evaluator->Update(file).has_value());
292320

@@ -309,13 +337,24 @@ TEST(AggregateTest, AggregatesFromDataFileWithTransform) {
309337

310338
ICEBERG_UNWRAP_OR_FAIL(auto lower, Literal::Int(5).Serialize());
311339
ICEBERG_UNWRAP_OR_FAIL(auto upper, Literal::Int(23).Serialize());
312-
DataFile file{
313-
.record_count = 5,
314-
.value_counts = {{1, 5}},
315-
.null_value_counts = {{1, 0}},
316-
.lower_bounds = {{1, lower}},
317-
.upper_bounds = {{1, upper}},
318-
};
340+
DataFile file{.file_path = "",
341+
.partition = {},
342+
.record_count = 5,
343+
.column_sizes = {},
344+
.value_counts = {{1, 5}},
345+
.null_value_counts = {{1, 0}},
346+
.nan_value_counts = {},
347+
.lower_bounds = {{1, lower}},
348+
.upper_bounds = {{1, upper}},
349+
.key_metadata = {},
350+
.split_offsets = {},
351+
.equality_ids = {},
352+
.sort_order_id = std::nullopt,
353+
.first_row_id = std::nullopt,
354+
.referenced_data_file = std::nullopt,
355+
.content_offset = std::nullopt,
356+
.content_size_in_bytes = std::nullopt,
357+
.partition_spec_id = std::nullopt};
319358

320359
ASSERT_TRUE(evaluator->Update(file).has_value());
321360

@@ -344,34 +383,64 @@ TEST(AggregateTest, DataFileAggregatorParity) {
344383
};
345384

346385
auto [b1_lower, b1_upper] = make_bounds(1, 33, 2345);
347-
DataFile file{
348-
.file_path = "file.avro",
349-
.record_count = 50,
350-
.value_counts = {{1, 50}, {3, 50}, {4, 50}},
351-
.null_value_counts = {{1, 10}, {3, 50}, {4, 10}},
352-
.lower_bounds = std::move(b1_lower),
353-
.upper_bounds = std::move(b1_upper),
354-
};
386+
DataFile file{.file_path = "file.avro",
387+
.partition = {},
388+
.record_count = 50,
389+
.column_sizes = {},
390+
.value_counts = {{1, 50}, {3, 50}, {4, 50}},
391+
.null_value_counts = {{1, 10}, {3, 50}, {4, 10}},
392+
.nan_value_counts = {},
393+
.lower_bounds = std::move(b1_lower),
394+
.upper_bounds = std::move(b1_upper),
395+
.key_metadata = {},
396+
.split_offsets = {},
397+
.equality_ids = {},
398+
.sort_order_id = std::nullopt,
399+
.first_row_id = std::nullopt,
400+
.referenced_data_file = std::nullopt,
401+
.content_offset = std::nullopt,
402+
.content_size_in_bytes = std::nullopt,
403+
.partition_spec_id = std::nullopt};
355404

356405
auto [b2_lower, b2_upper] = make_bounds(1, 33, 100);
357-
DataFile missing_some_nulls_1{
358-
.file_path = "file_2.avro",
359-
.record_count = 20,
360-
.value_counts = {{1, 20}, {3, 20}},
361-
.null_value_counts = {{1, 0}, {3, 20}},
362-
.lower_bounds = std::move(b2_lower),
363-
.upper_bounds = std::move(b2_upper),
364-
};
406+
DataFile missing_some_nulls_1{.file_path = "file_2.avro",
407+
.partition = {},
408+
.record_count = 20,
409+
.column_sizes = {},
410+
.value_counts = {{1, 20}, {3, 20}},
411+
.null_value_counts = {{1, 0}, {3, 20}},
412+
.nan_value_counts = {},
413+
.lower_bounds = std::move(b2_lower),
414+
.upper_bounds = std::move(b2_upper),
415+
.key_metadata = {},
416+
.split_offsets = {},
417+
.equality_ids = {},
418+
.sort_order_id = std::nullopt,
419+
.first_row_id = std::nullopt,
420+
.referenced_data_file = std::nullopt,
421+
.content_offset = std::nullopt,
422+
.content_size_in_bytes = std::nullopt,
423+
.partition_spec_id = std::nullopt};
365424

366425
auto [b3_lower, b3_upper] = make_bounds(1, -33, 3333);
367-
DataFile missing_some_nulls_2{
368-
.file_path = "file_3.avro",
369-
.record_count = 20,
370-
.value_counts = {{1, 20}, {3, 20}},
371-
.null_value_counts = {{1, 20}, {3, 20}},
372-
.lower_bounds = std::move(b3_lower),
373-
.upper_bounds = std::move(b3_upper),
374-
};
426+
DataFile missing_some_nulls_2{.file_path = "file_3.avro",
427+
.partition = {},
428+
.record_count = 20,
429+
.column_sizes = {},
430+
.value_counts = {{1, 20}, {3, 20}},
431+
.null_value_counts = {{1, 20}, {3, 20}},
432+
.nan_value_counts = {},
433+
.lower_bounds = std::move(b3_lower),
434+
.upper_bounds = std::move(b3_upper),
435+
.key_metadata = {},
436+
.split_offsets = {},
437+
.equality_ids = {},
438+
.sort_order_id = std::nullopt,
439+
.first_row_id = std::nullopt,
440+
.referenced_data_file = std::nullopt,
441+
.content_offset = std::nullopt,
442+
.content_size_in_bytes = std::nullopt,
443+
.partition_spec_id = std::nullopt};
375444

376445
DataFile missing_some_stats{
377446
.file_path = "file_missing_stats.avro",
@@ -382,11 +451,24 @@ TEST(AggregateTest, DataFileAggregatorParity) {
382451
missing_some_stats.lower_bounds = std::move(b4_lower);
383452
missing_some_stats.upper_bounds = std::move(b4_upper);
384453

385-
DataFile missing_all_optional_stats{
386-
.file_path = "file_null_stats.avro",
387-
.record_count = 20,
388-
};
389-
454+
DataFile missing_all_optional_stats{.file_path = "file_null_stats.avro",
455+
.partition = {},
456+
.record_count = 20,
457+
.column_sizes = {},
458+
.value_counts = {},
459+
.null_value_counts = {},
460+
.nan_value_counts = {},
461+
.lower_bounds = {},
462+
.upper_bounds = {},
463+
.key_metadata = {},
464+
.split_offsets = {},
465+
.equality_ids = {},
466+
.sort_order_id = std::nullopt,
467+
.first_row_id = std::nullopt,
468+
.referenced_data_file = std::nullopt,
469+
.content_offset = std::nullopt,
470+
.content_size_in_bytes = std::nullopt,
471+
.partition_spec_id = std::nullopt};
390472
auto run_case = [&](const std::vector<std::shared_ptr<Expression>>& exprs,
391473
const std::vector<DataFile>& files,
392474
const std::vector<std::optional<Scalar>>& expected,

src/iceberg/test/schema_test.cc

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -709,9 +709,9 @@ struct SelectTestParam {
709709
std::function<std::unique_ptr<iceberg::Schema>()> create_schema;
710710
std::vector<std::string> select_fields;
711711
std::function<std::unique_ptr<iceberg::Schema>()> expected_schema;
712-
bool should_succeed = false;
713-
std::string expected_error_message = "";
714-
bool case_sensitive = true;
712+
bool should_succeed;
713+
std::string expected_error_message;
714+
bool case_sensitive;
715715
};
716716

717717
class SelectParamTest : public ::testing::TestWithParam<SelectTestParam> {};
@@ -738,38 +738,49 @@ INSTANTIATE_TEST_SUITE_P(
738738
.create_schema = []() { return BasicSchema(); },
739739
.select_fields = {"*"},
740740
.expected_schema = []() { return BasicSchema(); },
741-
.should_succeed = true},
741+
.should_succeed = true,
742+
.expected_error_message = "",
743+
.case_sensitive = false},
742744

743745
SelectTestParam{.test_name = "SelectSingleField",
744746
.create_schema = []() { return BasicSchema(); },
745747
.select_fields = {"name"},
746748
.expected_schema = []() { return MakeSchema(Name()); },
747-
.should_succeed = true},
749+
.should_succeed = true,
750+
.expected_error_message = "",
751+
.case_sensitive = false},
748752

749753
SelectTestParam{
750754
.test_name = "SelectMultipleFields",
751755
.create_schema = []() { return BasicSchema(); },
752756
.select_fields = {"id", "name", "age"},
753757
.expected_schema = []() { return MakeSchema(Id(), Name(), Age()); },
754-
.should_succeed = true},
758+
.should_succeed = true,
759+
.expected_error_message = "",
760+
.case_sensitive = false},
755761

756762
SelectTestParam{.test_name = "SelectNonExistentField",
757763
.create_schema = []() { return BasicSchema(); },
758764
.select_fields = {"nonexistent"},
759765
.expected_schema = []() { return MakeSchema(); },
760-
.should_succeed = true},
766+
.should_succeed = true,
767+
.expected_error_message = "",
768+
.case_sensitive = false},
761769

762770
SelectTestParam{.test_name = "SelectCaseSensitive",
763771
.create_schema = []() { return BasicSchema(); },
764772
.select_fields = {"Name"}, // case-sensitive
765773
.expected_schema = []() { return MakeSchema(); },
766-
.should_succeed = true},
774+
.should_succeed = true,
775+
.expected_error_message = "",
776+
.case_sensitive = false},
767777

768778
SelectTestParam{.test_name = "SelectCaseInsensitive",
769779
.create_schema = []() { return BasicSchema(); },
770780
.select_fields = {"Name"}, // case-insensitive
771781
.expected_schema = []() { return MakeSchema(Name()); },
772782
.should_succeed = true,
783+
.expected_error_message = "",
773784
.case_sensitive = false}));
774785

775786
INSTANTIATE_TEST_SUITE_P(
@@ -779,7 +790,9 @@ INSTANTIATE_TEST_SUITE_P(
779790
.create_schema = []() { return AddressSchema(); },
780791
.select_fields = {"id", "name"},
781792
.expected_schema = []() { return MakeSchema(Id(), Name()); },
782-
.should_succeed = true},
793+
.should_succeed = true,
794+
.expected_error_message = "",
795+
.case_sensitive = false},
783796

784797
SelectTestParam{.test_name = "SelectNestedField",
785798
.create_schema = []() { return AddressSchema(); },
@@ -792,7 +805,10 @@ INSTANTIATE_TEST_SUITE_P(
792805
true};
793806
return MakeSchema(address_field);
794807
},
795-
.should_succeed = true}));
808+
809+
.should_succeed = true,
810+
.expected_error_message = "",
811+
.case_sensitive = false}));
796812

797813
INSTANTIATE_TEST_SUITE_P(
798814
SelectMultiLevelTestCases, SelectParamTest,
@@ -810,7 +826,9 @@ INSTANTIATE_TEST_SUITE_P(
810826
17, "user", std::move(user_type), true};
811827
return MakeSchema(Id(), user_field);
812828
},
813-
.should_succeed = true},
829+
.should_succeed = true,
830+
.expected_error_message = "",
831+
.case_sensitive = false},
814832

815833
SelectTestParam{.test_name = "SelectNestedFieldsAtDifferentLevels",
816834
.create_schema = []() { return MultiLevelSchema(); },
@@ -831,7 +849,9 @@ INSTANTIATE_TEST_SUITE_P(
831849
26, "user", std::move(user_type), true};
832850
return MakeSchema(user_field);
833851
},
834-
.should_succeed = true},
852+
.should_succeed = true,
853+
.expected_error_message = "",
854+
.case_sensitive = false},
835855

836856
SelectTestParam{.test_name = "SelectListAndNestedFields",
837857
.create_schema = []() { return ListSchema(); },
@@ -843,7 +863,9 @@ INSTANTIATE_TEST_SUITE_P(
843863
45, "user", std::move(user_type), true};
844864
return MakeSchema(Id(), user_field);
845865
},
846-
.should_succeed = true}));
866+
.should_succeed = true,
867+
.expected_error_message = "",
868+
.case_sensitive = false}));
847869

848870
struct ProjectTestParam {
849871
std::string test_name;

0 commit comments

Comments
 (0)