77from typing_extensions import override
88
99from prime_backup .action import Action
10+ from prime_backup .action .helpers .chunk_grouper import ChunkGrouper
1011from prime_backup .compressors import Compressor
1112from prime_backup .db import schema
1213from prime_backup .db .access import DbAccess
@@ -177,22 +178,32 @@ def __collect_chunk_moves(self, session: DbSession) -> List[_HashMove[schema.Chu
177178 self .__ensure_paths_can_migrate (moves , chunk_utils .get_chunk_path )
178179 return _changed_moves (moves )
179180
180- def __collect_chunk_group_moves (self , session : DbSession ) -> List [_HashMove [schema .ChunkGroup ]]:
181- moves : List [_HashMove [schema .ChunkGroup ]] = []
182- chunk_groups = session .list_chunk_groups ()
183- chunks_by_group_id = session .get_chunk_group_chunks_batch ([chunk_group .id for chunk_group in chunk_groups ])
184- total = len (chunk_groups )
185- for i , chunk_group in enumerate (chunk_groups ):
186- new_hash = chunk_utils .create_chunk_group_hash (
187- offset_chunk .chunk .hash
188- for offset_chunk in chunks_by_group_id [chunk_group .id ]
189- )
190- moves .append (_HashMove (object = chunk_group , old_hash = chunk_group .hash , new_hash = new_hash ))
191- if (i + 1 ) % 5000 == 0 or i + 1 == total :
192- self .logger .info ('Calculated chunk group hashes {} / {}' .format (i + 1 , total ))
193-
194- self .__ensure_hashes_can_migrate (moves , 'chunk group' )
195- return _changed_moves (moves )
181+ def __regroup_chunked_blobs (self , session : DbSession ):
182+ # Step 1 - collect blob -> ordered chunks before destroying the binding chain
183+ chunked_blobs = session .list_blobs_by_storage_method (BlobStorageMethod .chunked )
184+ total = len (chunked_blobs )
185+ blob_chunks_map : Dict [int , Dict [int , schema .Chunk ]] = {}
186+ for blob in chunked_blobs :
187+ offset_chunks = session .get_blob_chunks (blob .id ) # sorted by absolute_offset
188+ blob_chunks_map [blob .id ] = {oc .offset : oc .chunk for oc in offset_chunks }
189+
190+ chunk_group_count = session .get_chunk_group_count ()
191+ self .logger .info ('Dropping {} chunk group and all bindings, then re-grouping {} chunked blob with the new chunk hashes' .format (chunk_group_count , total ))
192+
193+ # Step 2 - wipe all chunk group data
194+ session .delete_all_blob_chunk_group_bindings ()
195+ session .delete_all_chunk_group_chunk_bindings ()
196+ session .delete_all_chunk_groups ()
197+ session .flush ()
198+
199+ # Step 3 - re-group using ChunkGrouper (which applies the endswith('00') cut rule)
200+ if total == 0 :
201+ return
202+ chunk_grouper = ChunkGrouper (session , None )
203+ for i , blob in enumerate (chunked_blobs ):
204+ chunk_grouper .create_chunk_groups (blob , blob_chunks_map [blob .id ])
205+ if (i + 1 ) % 200 == 0 or i + 1 == total :
206+ self .logger .info ('Re-grouped chunked blobs {} / {}' .format (i + 1 , total ))
196207
197208 # ==================== DB Updates ====================
198209
@@ -233,11 +244,6 @@ def __migrate_chunk_hashes(self, session: DbSession, moves: List[_HashMove[schem
233244 move .object .hash = move .new_hash
234245 session .flush ()
235246
236- def __migrate_chunk_group_hashes (self , session : DbSession , moves : List [_HashMove [schema .ChunkGroup ]]):
237- for move in moves :
238- move .object .hash = move .new_hash
239- session .flush ()
240-
241247 def __rollback_files (self ):
242248 self .__move_journal .rollback ()
243249
@@ -260,7 +266,7 @@ def run(self) -> None:
260266
261267 self .__migrate_blob_hashes (session , self .__collect_blob_moves (session ))
262268 self .__migrate_chunk_hashes (session , self .__collect_chunk_moves (session ))
263- self .__migrate_chunk_group_hashes (session , self . __collect_chunk_group_moves ( session ) )
269+ self .__regroup_chunked_blobs (session )
264270
265271 meta = session .get_db_meta ()
266272 meta .hash_method = self .new_hash_method .name
0 commit comments