Skip to content

Commit 7b80518

Browse files
committed
fix: deduplicate manifest files for multi snapshots
1 parent 4c4ffff commit 7b80518

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

src/iceberg/manifest/manifest_list.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,3 +272,12 @@ ICEBERG_EXPORT inline constexpr Result<ManifestContent> ManifestContentFromStrin
272272
}
273273

274274
} // namespace iceberg
275+
276+
namespace std {
277+
template <>
278+
struct std::hash<iceberg::ManifestFile> {
279+
size_t operator()(const iceberg::ManifestFile& manifest_file) const {
280+
return std::hash<std::string>{}(manifest_file.manifest_path);
281+
}
282+
};
283+
} // namespace std

src/iceberg/table_scan.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -674,11 +674,11 @@ Result<std::vector<std::shared_ptr<FileScanTask>>> IncrementalAppendScan::PlanFi
674674
std::inserter(snapshot_ids, snapshot_ids.end()),
675675
[](const auto& snapshot) { return snapshot->snapshot_id; });
676676

677-
std::vector<ManifestFile> data_manifests;
677+
std::unordered_set<ManifestFile> data_manifests;
678678
for (const auto& snapshot : append_snapshots) {
679679
SnapshotCache snapshot_cache(snapshot.get());
680680
ICEBERG_ASSIGN_OR_RAISE(auto manifests, snapshot_cache.DataManifests(io_));
681-
std::ranges::copy_if(manifests, std::back_inserter(data_manifests),
681+
std::ranges::copy_if(manifests, std::inserter(data_manifests, data_manifests.end()),
682682
[&snapshot_ids](const ManifestFile& manifest) {
683683
return snapshot_ids.contains(manifest.added_snapshot_id);
684684
});
@@ -692,7 +692,9 @@ Result<std::vector<std::shared_ptr<FileScanTask>>> IncrementalAppendScan::PlanFi
692692

693693
ICEBERG_ASSIGN_OR_RAISE(
694694
auto manifest_group,
695-
ManifestGroup::Make(io_, schema_, specs_by_id, std::move(data_manifests), {}));
695+
ManifestGroup::Make(
696+
io_, schema_, specs_by_id,
697+
std::vector<ManifestFile>(data_manifests.begin(), data_manifests.end()), {}));
696698

697699
manifest_group->CaseSensitive(context_.case_sensitive)
698700
.Select(ScanColumns())

0 commit comments

Comments
 (0)