Skip to content

Commit 1e7d401

Browse files
committed
refactor: Initialize parquet_logical_type to nullptr and add empty metadata in WriteArray
1 parent 0f3cc90 commit 1e7d401

File tree

2 files changed

+120
-26
lines changed

2 files changed

+120
-26
lines changed

src/iceberg/test/table_scan_test.cc

Lines changed: 119 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -79,28 +79,36 @@ class TableScanTest : public testing::TestWithParam<int8_t> {
7979
.sequence_number = kSequenceNumber,
8080
.timestamp_ms = kTimestampMs,
8181
.manifest_list = "/tmp/metadata/snap-1000-1-manifest-list.avro",
82+
.summary = {},
8283
.schema_id = schema_->schema_id()});
8384

84-
table_metadata_ = std::make_shared<TableMetadata>(
85-
TableMetadata{.format_version = 2,
86-
.table_uuid = "test-table-uuid",
87-
.location = "/tmp/table",
88-
.last_sequence_number = kSequenceNumber,
89-
.last_updated_ms = kTimestampMs,
90-
.last_column_id = 2,
91-
.schemas = {schema_},
92-
.current_schema_id = schema_->schema_id(),
93-
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
94-
.default_spec_id = partitioned_spec_->spec_id(),
95-
.last_partition_id = 1000,
96-
.current_snapshot_id = kSnapshotId,
97-
.snapshots = {snapshot},
98-
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = kTimestampMs,
99-
.snapshot_id = kSnapshotId}},
100-
.default_sort_order_id = 0,
101-
.refs = {{"main", std::make_shared<SnapshotRef>(SnapshotRef{
102-
.snapshot_id = kSnapshotId,
103-
.retention = SnapshotRef::Branch{}})}}});
85+
table_metadata_ = std::make_shared<TableMetadata>(TableMetadata{
86+
.format_version = 2,
87+
.table_uuid = "test-table-uuid",
88+
.location = "/tmp/table",
89+
.last_sequence_number = kSequenceNumber,
90+
.last_updated_ms = kTimestampMs,
91+
.last_column_id = 2,
92+
.schemas = {schema_},
93+
.current_schema_id = schema_->schema_id(),
94+
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
95+
.default_spec_id = partitioned_spec_->spec_id(),
96+
.last_partition_id = 1000,
97+
.properties = {},
98+
.current_snapshot_id = kSnapshotId,
99+
.snapshots = {snapshot},
100+
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = kTimestampMs,
101+
.snapshot_id = kSnapshotId}},
102+
.metadata_log = {},
103+
.sort_orders = {},
104+
.default_sort_order_id = 0,
105+
.refs = {{"main",
106+
std::make_shared<SnapshotRef>(SnapshotRef{
107+
.snapshot_id = kSnapshotId, .retention = SnapshotRef::Branch{}})}},
108+
.statistics = {},
109+
.partition_statistics = {},
110+
.next_row_id = 0,
111+
});
104112
}
105113

106114
std::shared_ptr<DataFile> MakePositionDeleteFile(
@@ -113,7 +121,20 @@ class TableScanTest : public testing::TestWithParam<int8_t> {
113121
.partition = partition,
114122
.record_count = 1,
115123
.file_size_in_bytes = 10,
124+
.column_sizes = {},
125+
.value_counts = {},
126+
.null_value_counts = {},
127+
.nan_value_counts = {},
128+
.lower_bounds = {},
129+
.upper_bounds = {},
130+
.key_metadata = {},
131+
.split_offsets = {},
132+
.equality_ids = {},
133+
.sort_order_id = std::nullopt,
134+
.first_row_id = std::nullopt,
116135
.referenced_data_file = referenced_file,
136+
.content_offset = std::nullopt,
137+
.content_size_in_bytes = std::nullopt,
117138
.partition_spec_id = spec_id,
118139
});
119140
}
@@ -129,7 +150,20 @@ class TableScanTest : public testing::TestWithParam<int8_t> {
129150
.partition = partition,
130151
.record_count = 1,
131152
.file_size_in_bytes = 10,
153+
.column_sizes = {},
154+
.value_counts = {},
155+
.null_value_counts = {},
156+
.nan_value_counts = {},
157+
.lower_bounds = {},
158+
.upper_bounds = {},
159+
.key_metadata = {},
160+
.split_offsets = {},
132161
.equality_ids = std::move(equality_ids),
162+
.sort_order_id = std::nullopt,
163+
.first_row_id = std::nullopt,
164+
.referenced_data_file = std::nullopt,
165+
.content_offset = std::nullopt,
166+
.content_size_in_bytes = std::nullopt,
133167
.partition_spec_id = spec_id,
134168
});
135169
}
@@ -151,7 +185,20 @@ class TableScanTest : public testing::TestWithParam<int8_t> {
151185
.partition = partition,
152186
.record_count = record_count,
153187
.file_size_in_bytes = 10,
188+
.column_sizes = {},
189+
.value_counts = {},
190+
.null_value_counts = {},
191+
.nan_value_counts = {},
192+
.lower_bounds = {},
193+
.upper_bounds = {},
194+
.key_metadata = {},
195+
.split_offsets = {},
196+
.equality_ids = {},
154197
.sort_order_id = 0,
198+
.first_row_id = std::nullopt,
199+
.referenced_data_file = std::nullopt,
200+
.content_offset = std::nullopt,
201+
.content_size_in_bytes = std::nullopt,
155202
.partition_spec_id = spec_id,
156203
});
157204
// Set lower/upper bounds for field_id=1 ("id" column) if provided
@@ -351,13 +398,27 @@ TEST_P(TableScanTest, TableScanBuilderValidationErrors) {
351398
TEST_P(TableScanTest, DataTableScanPlanFilesEmpty) {
352399
auto empty_metadata = std::make_shared<TableMetadata>(
353400
TableMetadata{.format_version = 2,
401+
.table_uuid = "",
402+
.location = "",
403+
.last_sequence_number = 0,
404+
.last_updated_ms = {},
405+
.last_column_id = 0,
354406
.schemas = {schema_},
355407
.current_schema_id = schema_->schema_id(),
356408
.partition_specs = {unpartitioned_spec_},
357409
.default_spec_id = unpartitioned_spec_->spec_id(),
410+
.last_partition_id = 0,
411+
.properties = {},
358412
.current_snapshot_id = -1,
359413
.snapshots = {},
360-
.refs = {}});
414+
.snapshot_log = {},
415+
.metadata_log = {},
416+
.sort_orders = {},
417+
.default_sort_order_id = 0,
418+
.refs = {},
419+
.statistics = {},
420+
.partition_statistics = {},
421+
.next_row_id = 0});
361422

362423
ICEBERG_UNWRAP_OR_FAIL(auto builder,
363424
DataTableScanBuilder::Make(empty_metadata, file_io_));
@@ -407,15 +468,22 @@ TEST_P(TableScanTest, PlanFilesWithDataManifests) {
407468
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
408469
.default_spec_id = partitioned_spec_->spec_id(),
409470
.last_partition_id = 1000,
471+
.properties = {},
410472
.current_snapshot_id = kSnapshotId,
411473
.snapshots = {snapshot_with_manifest},
412474
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
413475
.snapshot_id = kSnapshotId}},
476+
477+
.metadata_log = {},
478+
.sort_orders = {},
414479
.default_sort_order_id = 0,
415480
.refs = {{"main", std::make_shared<SnapshotRef>(SnapshotRef{
416481
.snapshot_id = kSnapshotId,
417482
.retention = SnapshotRef::Branch{},
418-
})}}});
483+
})}},
484+
.statistics = {},
485+
.partition_statistics = {},
486+
.next_row_id = 0});
419487

420488
ICEBERG_UNWRAP_OR_FAIL(auto builder,
421489
DataTableScanBuilder::Make(metadata_with_manifest, file_io_));
@@ -474,15 +542,21 @@ TEST_P(TableScanTest, PlanFilesWithMultipleManifests) {
474542
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
475543
.default_spec_id = partitioned_spec_->spec_id(),
476544
.last_partition_id = 1000,
545+
.properties = {},
477546
.current_snapshot_id = 1000L,
478547
.snapshots = {snapshot_with_manifests},
479548
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
480549
.snapshot_id = 1000L}},
550+
.metadata_log = {},
551+
.sort_orders = {},
481552
.default_sort_order_id = 0,
482553
.refs = {{"main", std::make_shared<SnapshotRef>(SnapshotRef{
483554
.snapshot_id = 1000L,
484555
.retention = SnapshotRef::Branch{},
485-
})}}});
556+
})}},
557+
.statistics = {},
558+
.partition_statistics = {},
559+
.next_row_id = 0});
486560

487561
ICEBERG_UNWRAP_OR_FAIL(auto builder,
488562
DataTableScanBuilder::Make(metadata_with_manifests, file_io_));
@@ -522,6 +596,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) {
522596
.sequence_number = 1L,
523597
.timestamp_ms = timestamp_ms,
524598
.manifest_list = manifest_list_path,
599+
.summary = {},
525600
.schema_id = schema_->schema_id()});
526601

527602
auto metadata = std::make_shared<TableMetadata>(TableMetadata{
@@ -536,14 +611,20 @@ TEST_P(TableScanTest, PlanFilesWithFilter) {
536611
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
537612
.default_spec_id = partitioned_spec_->spec_id(),
538613
.last_partition_id = 1000,
614+
.properties = {},
539615
.current_snapshot_id = kSnapshotId,
540616
.snapshots = {snapshot},
541617
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
542618
.snapshot_id = kSnapshotId}},
619+
.metadata_log = {},
620+
.sort_orders = {},
543621
.default_sort_order_id = 0,
544622
.refs = {{"main",
545623
std::make_shared<SnapshotRef>(SnapshotRef{
546-
.snapshot_id = kSnapshotId, .retention = SnapshotRef::Branch{}})}}});
624+
.snapshot_id = kSnapshotId, .retention = SnapshotRef::Branch{}})}},
625+
.statistics = {},
626+
.partition_statistics = {},
627+
.next_row_id = 0});
547628

548629
// Test 1: Filter matches only data1.parquet (id=25 is in range [1, 50])
549630
{
@@ -643,15 +724,21 @@ TEST_P(TableScanTest, PlanFilesWithDeleteFiles) {
643724
.partition_specs = {partitioned_spec_, unpartitioned_spec_},
644725
.default_spec_id = partitioned_spec_->spec_id(),
645726
.last_partition_id = 1000,
727+
.properties = {},
646728
.current_snapshot_id = kSnapshotId,
647729
.snapshots = {snapshot_with_manifests},
648730
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
649731
.snapshot_id = kSnapshotId}},
732+
.metadata_log = {},
733+
.sort_orders = {},
650734
.default_sort_order_id = 0,
651735
.refs = {{"main", std::make_shared<SnapshotRef>(SnapshotRef{
652736
.snapshot_id = kSnapshotId,
653737
.retention = SnapshotRef::Branch{},
654-
})}}});
738+
})}},
739+
.statistics = {},
740+
.partition_statistics = {},
741+
.next_row_id = 0});
655742

656743
ICEBERG_UNWRAP_OR_FAIL(auto builder,
657744
DataTableScanBuilder::Make(metadata_with_manifests, file_io_));
@@ -684,22 +771,29 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) {
684771
.partition_specs = {unpartitioned_spec_},
685772
.default_spec_id = unpartitioned_spec_->spec_id(),
686773
.last_partition_id = 1000,
774+
.properties = {},
687775
.current_snapshot_id = 1000L,
688776
.snapshots = {std::make_shared<Snapshot>(Snapshot{
689777
.snapshot_id = 1000L,
690778
.parent_snapshot_id = std::nullopt,
691779
.sequence_number = 1L,
692780
.timestamp_ms = timestamp_ms,
693781
.manifest_list = "/tmp/metadata/snap-1000-1-manifest-list.avro",
782+
.summary = {},
694783
.schema_id = schema->schema_id(),
695784
})},
696785
.snapshot_log = {SnapshotLogEntry{.timestamp_ms = timestamp_ms,
697786
.snapshot_id = 1000L}},
787+
.metadata_log = {},
788+
.sort_orders = {},
698789
.default_sort_order_id = 0,
699790
.refs = {{"main", std::make_shared<SnapshotRef>(SnapshotRef{
700791
.snapshot_id = 1000L,
701792
.retention = SnapshotRef::Branch{},
702793
})}},
794+
.statistics = {},
795+
.partition_statistics = {},
796+
.next_row_id = 0,
703797
});
704798

705799
// Select "data" column, filter on "id" column

src/iceberg/test/update_test_base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class UpdateTestBase : public ::testing::Test {
4646
virtual std::string TableName() const { return "test_table"; }
4747

4848
void SetUp() override {
49-
table_ident_ = TableIdentifier{.name = TableName()};
49+
table_ident_ = TableIdentifier{.ns = {}, .name = TableName()};
5050
table_location_ = "/warehouse/" + TableName();
5151

5252
InitializeFileIO();

0 commit comments

Comments
 (0)