@@ -117,12 +117,24 @@ bool ManifestFilterManager::ContainsDeletes() const {
117117 !drop_partitions_.empty ();
118118}
119119
120+ void ManifestFilterManager::DropDeleteFilesOlderThan (int64_t sequence_number) {
121+ min_sequence_number_ = sequence_number;
122+ }
123+
124+ void ManifestFilterManager::RemoveDanglingDeletesFor (const DataFileSet& deleted_files) {
125+ for (const auto & file : deleted_files) {
126+ removed_data_file_paths_.insert (file->file_path );
127+ }
128+ }
129+
120130Result<bool > ManifestFilterManager::CanContainDroppedFiles (const ManifestFile&) const {
121131 // TODO(Guotao): Use the manifest descriptor to skip unrelated object-delete
122132 // manifests once object-delete partitions are tracked separately.
123133 // Currently, DeleteFile(std::shared_ptr<DataFile>) degrades to a path-based delete,
124134 // which forces scanning all manifests.
125- return !delete_paths_.empty ();
135+ // Also open delete manifests when a minimum sequence number is set for cleanup.
136+ return !delete_paths_.empty () || !removed_data_file_paths_.empty () ||
137+ (manifest_content_ == ManifestContent::kDeletes && min_sequence_number_ > 0 );
126138}
127139
128140Result<bool > ManifestFilterManager::CanContainDroppedPartitions (
@@ -219,6 +231,25 @@ Result<bool> ManifestFilterManager::ShouldDelete(const ManifestEntry& entry,
219231 return true ;
220232 }
221233
234+ // Delete-manifest-specific cleanup (only for ManifestContent::kDeletes).
235+ if (manifest_content_ == ManifestContent::kDeletes ) {
236+ // Drop delete files whose data sequence number is older than the minimum
237+ // retained by the table (they can no longer match any live data rows).
238+ // seq == 0 (kInitialSequenceNumber / nullopt) is intentionally excluded:
239+ // those entries predate sequence number assignment and must not be pruned.
240+ int64_t seq = entry.sequence_number .value_or (0 );
241+ if (min_sequence_number_ > 0 && seq > 0 && seq < min_sequence_number_) {
242+ return true ;
243+ }
244+
245+ // Drop DVs that reference a data file that has been removed (dangling DV).
246+ if (!removed_data_file_paths_.empty () && file.IsDeletionVector () &&
247+ file.referenced_data_file .has_value () &&
248+ removed_data_file_paths_.count (*file.referenced_data_file )) {
249+ return true ;
250+ }
251+ }
252+
222253 if (HasRowFilterExpression (delete_expr_)) {
223254 ICEBERG_ASSIGN_OR_RAISE (auto * residual_eval,
224255 GetResidualEvaluator (schema, specs_by_id, spec_id));
@@ -403,6 +434,7 @@ Result<std::vector<ManifestFile>> ManifestFilterManager::FilterManifests(
403434 bool trust_manifest_references = CanTrustManifestReferences (manifests);
404435 manifest_evaluator_cache_.clear ();
405436 residual_evaluator_cache_.clear ();
437+ replaced_manifests_count_ = 0 ;
406438
407439 // TODO(Guotao): Parallelize manifest filtering with per-manifest results, then
408440 // merge found paths and deleted files after the loop.
@@ -413,6 +445,9 @@ Result<std::vector<ManifestFile>> ManifestFilterManager::FilterManifests(
413445 auto filtered_manifest,
414446 FilterManifest (schema, specs_by_id, *manifest_ptr, trust_manifest_references,
415447 writer_factory, found_paths));
448+ if (filtered_manifest.manifest_path != manifest_ptr->manifest_path ) {
449+ ++replaced_manifests_count_;
450+ }
416451 filtered.push_back (std::move (filtered_manifest));
417452 }
418453
0 commit comments