@@ -34,9 +34,22 @@ namespace iceberg {
3434
3535namespace {
3636
37+ const std::string& TaskFilePath (const std::shared_ptr<ChangelogScanTask>& task) {
38+ if (auto added = std::dynamic_pointer_cast<AddedRowsScanTask>(task)) {
39+ return added->data_file ()->file_path ;
40+ }
41+ if (auto deleted = std::dynamic_pointer_cast<DeletedDataFileScanTask>(task)) {
42+ return deleted->data_file ()->file_path ;
43+ }
44+
45+ static const std::string empty_path;
46+ return empty_path;
47+ }
48+
3749// / \brief Sort changelog scan tasks for deterministic ordering.
3850// / Sorts by change_ordinal, then by operation type name, then by file path.
39- void SortTasks (std::vector<std::shared_ptr<ChangelogScanTask>>& tasks) {
51+ template <typename TaskType>
52+ void SortTasks (std::vector<std::shared_ptr<TaskType>>& tasks) {
4053 std::ranges::sort (tasks, [](const auto & t1, const auto & t2) {
4154 if (t1->change_ordinal () != t2->change_ordinal ()) {
4255 return t1->change_ordinal () < t2->change_ordinal ();
@@ -45,7 +58,8 @@ void SortTasks(std::vector<std::shared_ptr<ChangelogScanTask>>& tasks) {
4558 return static_cast <uint8_t >(t1->operation ()) <
4659 static_cast <uint8_t >(t2->operation ());
4760 }
48- return t1->data_file ()->file_path < t2->data_file ()->file_path ;
61+ return TaskFilePath (std::static_pointer_cast<ChangelogScanTask>(t1)) <
62+ TaskFilePath (std::static_pointer_cast<ChangelogScanTask>(t2));
4963 });
5064}
5165
@@ -68,6 +82,10 @@ TEST_P(IncrementalChangelogScanTest, DataFilters) {
6882 auto snapshot_a = MakeAppendSnapshotWithPartitionValues (
6983 version, 1000L , std::nullopt , 1L , {{" /path/to/file_a.parquet" , partition_a}},
7084 partitioned_spec_);
85+ SnapshotCache cache_a (snapshot_a.get ());
86+ ICEBERG_UNWRAP_OR_FAIL (auto manifests_a, cache_a.DataManifests (file_io_));
87+ ASSERT_EQ (manifests_a.size (), 1 );
88+ const auto & manifest_a = manifests_a[0 ];
7189
7290 // Create snapshot 2 with file_b (separate manifest list, not inheriting from snap1)
7391 auto snapshot_b = MakeAppendSnapshotWithPartitionValues (
@@ -81,6 +99,10 @@ TEST_P(IncrementalChangelogScanTest, DataFilters) {
8199 .snapshot_id = 2000L , .retention = SnapshotRef::Branch{}})}},
82100 partitioned_spec_);
83101
102+ // Make the first manifest unavailable. Planning should still succeed because the
103+ // partition filter can skip reading file_a's manifest entirely.
104+ EXPECT_THAT (file_io_->DeleteFile (manifest_a.manifest_path ), IsOk ());
105+
84106 // Filter by data="k" which should match only file_b (bucket("k", 16) = 1)
85107 ICEBERG_UNWRAP_OR_FAIL (auto builder, IncrementalChangelogScanBuilder::Make (
86108 partitioned_metadata, file_io_));
@@ -94,8 +116,10 @@ TEST_P(IncrementalChangelogScanTest, DataFilters) {
94116 EXPECT_EQ (t1->change_ordinal (), 1 );
95117 EXPECT_EQ (t1->commit_snapshot_id (), 2000L );
96118 EXPECT_EQ (t1->operation (), ChangelogOperation::kInsert );
97- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_b.parquet" );
98- EXPECT_TRUE (t1->delete_files ().empty ());
119+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(t1);
120+ ASSERT_NE (insert_t1, nullptr );
121+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_b.parquet" );
122+ EXPECT_TRUE (insert_t1->delete_files ().empty ());
99123}
100124
101125TEST_P (IncrementalChangelogScanTest, Overwrites) {
@@ -130,16 +154,20 @@ TEST_P(IncrementalChangelogScanTest, Overwrites) {
130154 EXPECT_EQ (t1->change_ordinal (), 0 );
131155 EXPECT_EQ (t1->commit_snapshot_id (), 2000L );
132156 EXPECT_EQ (t1->operation (), ChangelogOperation::kInsert );
133- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_a2.parquet" );
134- EXPECT_TRUE (t1->delete_files ().empty ());
157+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(t1);
158+ ASSERT_NE (insert_t1, nullptr );
159+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_a2.parquet" );
160+ EXPECT_TRUE (insert_t1->delete_files ().empty ());
135161
136162 // Second task: deleted file (DELETE operation)
137163 auto t2 = tasks[1 ];
138164 EXPECT_EQ (t2->change_ordinal (), 0 );
139165 EXPECT_EQ (t2->commit_snapshot_id (), 2000L );
140166 EXPECT_EQ (t2->operation (), ChangelogOperation::kDelete );
141- EXPECT_EQ (t2->data_file ()->file_path , " /path/to/file_a.parquet" );
142- EXPECT_TRUE (t2->delete_files ().empty ());
167+ auto delete_t2 = std::dynamic_pointer_cast<DeletedDataFileScanTask>(t2);
168+ ASSERT_NE (delete_t2, nullptr );
169+ EXPECT_EQ (delete_t2->data_file ()->file_path , " /path/to/file_a.parquet" );
170+ EXPECT_TRUE (delete_t2->existing_deletes ().empty ());
143171}
144172
145173TEST_P (IncrementalChangelogScanTest, DuplicatedManifests) {
@@ -190,10 +218,14 @@ TEST_P(IncrementalChangelogScanTest, DuplicatedManifests) {
190218 ASSERT_EQ (tasks.size (), 2 );
191219 SortTasks (tasks);
192220
193- EXPECT_EQ (tasks[0 ]->data_file ()->file_path , " /path/to/file_a.parquet" );
221+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(tasks[0 ]);
222+ ASSERT_NE (insert_t1, nullptr );
223+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_a.parquet" );
194224 EXPECT_EQ (tasks[0 ]->commit_snapshot_id (), 1000L );
195225
196- EXPECT_EQ (tasks[1 ]->data_file ()->file_path , " /path/to/file_b.parquet" );
226+ auto insert_t2 = std::dynamic_pointer_cast<AddedRowsScanTask>(tasks[1 ]);
227+ ASSERT_NE (insert_t2, nullptr );
228+ EXPECT_EQ (insert_t2->data_file ()->file_path , " /path/to/file_b.parquet" );
197229 EXPECT_EQ (tasks[1 ]->commit_snapshot_id (), 2000L );
198230}
199231
@@ -225,8 +257,10 @@ TEST_P(IncrementalChangelogScanTest, FileDeletes) {
225257 EXPECT_EQ (t1->change_ordinal (), 0 );
226258 EXPECT_EQ (t1->commit_snapshot_id (), 2000L );
227259 EXPECT_EQ (t1->operation (), ChangelogOperation::kDelete );
228- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_a.parquet" );
229- EXPECT_TRUE (t1->delete_files ().empty ());
260+ auto delete_t1 = std::dynamic_pointer_cast<DeletedDataFileScanTask>(t1);
261+ ASSERT_NE (delete_t1, nullptr );
262+ EXPECT_EQ (delete_t1->data_file ()->file_path , " /path/to/file_a.parquet" );
263+ EXPECT_TRUE (delete_t1->existing_deletes ().empty ());
230264}
231265
232266TEST_P (IncrementalChangelogScanTest, ExistingEntriesInNewDataManifestsAreIgnored) {
@@ -278,8 +312,10 @@ TEST_P(IncrementalChangelogScanTest, ExistingEntriesInNewDataManifestsAreIgnored
278312 EXPECT_EQ (t1->change_ordinal (), 0 );
279313 EXPECT_EQ (t1->commit_snapshot_id (), 3000L );
280314 EXPECT_EQ (t1->operation (), ChangelogOperation::kInsert );
281- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_c.parquet" );
282- EXPECT_TRUE (t1->delete_files ().empty ());
315+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(t1);
316+ ASSERT_NE (insert_t1, nullptr );
317+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_c.parquet" );
318+ EXPECT_TRUE (insert_t1->delete_files ().empty ());
283319}
284320
285321TEST_P (IncrementalChangelogScanTest, DataFileRewrites) {
@@ -330,13 +366,17 @@ TEST_P(IncrementalChangelogScanTest, DataFileRewrites) {
330366 EXPECT_EQ (t1->change_ordinal (), 0 );
331367 EXPECT_EQ (t1->commit_snapshot_id (), 1000L );
332368 EXPECT_EQ (t1->operation (), ChangelogOperation::kInsert );
333- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_a.parquet" );
369+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(t1);
370+ ASSERT_NE (insert_t1, nullptr );
371+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_a.parquet" );
334372
335373 auto t2 = tasks[1 ];
336374 EXPECT_EQ (t2->change_ordinal (), 1 );
337375 EXPECT_EQ (t2->commit_snapshot_id (), 2000L );
338376 EXPECT_EQ (t2->operation (), ChangelogOperation::kInsert );
339- EXPECT_EQ (t2->data_file ()->file_path , " /path/to/file_b.parquet" );
377+ auto insert_t2 = std::dynamic_pointer_cast<AddedRowsScanTask>(t2);
378+ ASSERT_NE (insert_t2, nullptr );
379+ EXPECT_EQ (insert_t2->data_file ()->file_path , " /path/to/file_b.parquet" );
340380}
341381
342382TEST_P (IncrementalChangelogScanTest, ManifestRewritesAreIgnored) {
@@ -393,19 +433,25 @@ TEST_P(IncrementalChangelogScanTest, ManifestRewritesAreIgnored) {
393433 EXPECT_EQ (t1->change_ordinal (), 0 );
394434 EXPECT_EQ (t1->commit_snapshot_id (), 1000L );
395435 EXPECT_EQ (t1->operation (), ChangelogOperation::kInsert );
396- EXPECT_EQ (t1->data_file ()->file_path , " /path/to/file_a.parquet" );
436+ auto insert_t1 = std::dynamic_pointer_cast<AddedRowsScanTask>(t1);
437+ ASSERT_NE (insert_t1, nullptr );
438+ EXPECT_EQ (insert_t1->data_file ()->file_path , " /path/to/file_a.parquet" );
397439
398440 auto t2 = tasks[1 ];
399441 EXPECT_EQ (t2->change_ordinal (), 1 );
400442 EXPECT_EQ (t2->commit_snapshot_id (), 2000L );
401443 EXPECT_EQ (t2->operation (), ChangelogOperation::kInsert );
402- EXPECT_EQ (t2->data_file ()->file_path , " /path/to/file_b.parquet" );
444+ auto insert_t2 = std::dynamic_pointer_cast<AddedRowsScanTask>(t2);
445+ ASSERT_NE (insert_t2, nullptr );
446+ EXPECT_EQ (insert_t2->data_file ()->file_path , " /path/to/file_b.parquet" );
403447
404448 auto t3 = tasks[2 ];
405449 EXPECT_EQ (t3->change_ordinal (), 2 );
406450 EXPECT_EQ (t3->commit_snapshot_id (), 4000L );
407451 EXPECT_EQ (t3->operation (), ChangelogOperation::kInsert );
408- EXPECT_EQ (t3->data_file ()->file_path , " /path/to/file_c.parquet" );
452+ auto insert_t3 = std::dynamic_pointer_cast<AddedRowsScanTask>(t3);
453+ ASSERT_NE (insert_t3, nullptr );
454+ EXPECT_EQ (insert_t3->data_file ()->file_path , " /path/to/file_c.parquet" );
409455}
410456
411457TEST_P (IncrementalChangelogScanTest, DeleteFilesAreNotSupported) {
0 commit comments