@@ -695,8 +695,7 @@ def store_list(namespace):
695695 logger .error ("Repository index is corrupted and must be repaired; skipping the pack check." )
696696 objs_errors = index_errors + pack_errors
697697 logger .info (
698- f"Checked { index_files } index files ({ index_errors } errors) "
699- f"and { pack_files } packs ({ pack_errors } errors)."
698+ f"Checked { index_files } index files ({ index_errors } errors) and { pack_files } packs ({ pack_errors } errors)."
700699 )
701700 if objs_errors == 0 :
702701 logger .info (f"Finished { mode } repository check, no problems found." )
@@ -811,6 +810,70 @@ def delete(self, id):
811810 raise self .ObjectNotFound (id , str (self ._location ))
812811 logger .warning ("ignoring deletion of %s in %s" , bin_to_hex (id ), bin_to_hex (entry .pack_id ))
813812
813+ def compact_pack (self , pack_id , * , keep_ids : set , drop_ids : set ):
814+ """Rewrite pack <pack_id>, keeping <keep_ids> and dropping <drop_ids>, then delete the old pack.
815+
816+ keep_ids and drop_ids are sets of chunk ids that must together cover the whole pack (asserted:
817+ their ranges tile it with no gap or overlap, and their intersection is empty). Kept objects are
818+ copied into a new pack via store.defrag and repointed in the chunk index; dropped objects' index
819+ entries are removed.
820+
821+ Returns the new pack_id, None if nothing is kept (pack dropped), or <pack_id> unchanged if the
822+ kept objects reproduce the old pack (same sha256 name, nothing to delete).
823+
824+ Updates the in-memory chunk index only. The caller holds the exclusive lock and owns index
825+ durability: invalidate the cached index before calling, write it back after, as compact does.
826+ """
827+ self ._lock_refresh ()
828+ pack_key = "packs/" + bin_to_hex (pack_id )
829+
830+ assert keep_ids & drop_ids == set (), "an id cannot appear in both keep_ids and drop_ids"
831+
832+ # collect every object's range, tagged with whether it is kept, ordered by offset.
833+ located = [] # (obj_offset, obj_id, obj_size, keep)
834+ for obj_id in keep_ids | drop_ids :
835+ keep = obj_id in keep_ids
836+ entry = self .chunks [obj_id ]
837+ assert entry .pack_id == pack_id , f"{ bin_to_hex (obj_id )} is not in pack { bin_to_hex (pack_id )} "
838+ located .append ((entry .obj_offset , obj_id , entry .obj_size , keep ))
839+ located .sort ()
840+
841+ # keep + drop must tile the whole pack; collect the objects to keep in the same pass.
842+ kept = [] # (obj_offset, obj_id, obj_size), offset-ordered
843+ covered = 0
844+ for offset , obj_id , size , keep in located :
845+ assert offset == covered , f"gap or overlap in pack { bin_to_hex (pack_id )} at offset { covered } "
846+ covered += size
847+ if keep :
848+ kept .append ((offset , obj_id , size ))
849+ assert covered == self .store .info (pack_key ).size , f"pack { bin_to_hex (pack_id )} not fully covered"
850+
851+ for drop_id in drop_ids : # remove dropped objects from the index; their bytes are not copied forward
852+ del self .chunks [drop_id ]
853+
854+ if not kept : # nothing kept: drop the pack, no replacement
855+ self .store_delete (pack_key )
856+ return None
857+
858+ # copy kept objects into a new pack (named sha256 of its content)
859+ sources = [(bin_to_hex (pack_id ), offset , size ) for offset , _ , size in kept ]
860+ new_pack_id = hex_to_bin (self .store .defrag (sources , algorithm = "sha256" , namespace = "packs" ))
861+
862+ # repoint kept objects at the new pack; new offset is the running sum of kept sizes
863+ new_locations = []
864+ offset = 0
865+ for _ , keep_id , size in kept :
866+ new_locations .append ((keep_id , new_pack_id , offset , size ))
867+ offset += size
868+ self .chunks .update_pack_info (new_locations )
869+
870+ # delete the old pack last, after the new one is stored and indexed, so kept bytes are never the
871+ # only copy. if every object was kept in order, defrag reproduced the pack (new_pack_id == pack_id)
872+ # and deleting it would drop what we kept, so skip.
873+ if new_pack_id != pack_id :
874+ self .store_delete (pack_key )
875+ return new_pack_id
876+
814877 def break_lock (self ):
815878 Lock (self .store ).break_lock ()
816879
0 commit comments