@@ -143,3 +143,93 @@ def upload_bytes(
143143def public_url (key : str ) -> str :
144144 """Compose the public read URL for a given object key."""
145145 return f"{ DEFAULT_READ_HOST } /{ key .lstrip ('/' )} "
146+
147+
148+ def list_keys (client , bucket : str , prefix : str ) -> list [str ]:
149+ """Return every object key in ``bucket`` under ``prefix`` (paginated)."""
150+ keys : list [str ] = []
151+ paginator = client .get_paginator ("list_objects_v2" )
152+ for page in paginator .paginate (Bucket = bucket , Prefix = prefix ):
153+ for obj in page .get ("Contents" , []) or []:
154+ keys .append (obj ["Key" ])
155+ return keys
156+
157+
158+ def delete_keys (
159+ client ,
160+ bucket : str ,
161+ keys : list [str ],
162+ dry_run : bool = False ,
163+ ) -> None :
164+ """Delete ``keys`` from ``bucket`` in batches of up to 1000 per request."""
165+ if not keys :
166+ return
167+ prefix = "[dry-run] " if dry_run else ""
168+ print (f"{ prefix } deleting { len (keys )} object(s) from s3://{ bucket } /" )
169+ if dry_run :
170+ return
171+ for i in range (0 , len (keys ), 1000 ):
172+ chunk = keys [i : i + 1000 ]
173+ client .delete_objects (
174+ Bucket = bucket ,
175+ Delete = {
176+ "Objects" : [{"Key" : k } for k in chunk ],
177+ "Quiet" : True ,
178+ },
179+ )
180+
181+
182+ def mirror_prefix (
183+ client ,
184+ bucket : str ,
185+ src_prefix : str ,
186+ dst_prefix : str ,
187+ dry_run : bool = False ,
188+ ) -> dict :
189+ """Server-side copy every object under ``src_prefix`` to ``dst_prefix``.
190+
191+ Any objects currently under ``dst_prefix`` whose relative path is not
192+ present in ``src_prefix`` are deleted first, so ``dst_prefix`` becomes a
193+ faithful mirror of ``src_prefix``. Returns a summary dict with the
194+ counts and the resolved src/dst prefixes.
195+ """
196+ src_prefix = src_prefix if src_prefix .endswith ("/" ) else src_prefix + "/"
197+ dst_prefix = dst_prefix if dst_prefix .endswith ("/" ) else dst_prefix + "/"
198+
199+ # Without a live client we can't enumerate either side; report zeros.
200+ if client is None :
201+ src_keys : list [str ] = []
202+ dst_keys : list [str ] = []
203+ else :
204+ src_keys = list_keys (client , bucket , src_prefix )
205+ dst_keys = list_keys (client , bucket , dst_prefix )
206+
207+ src_relatives = {k [len (src_prefix ) :] for k in src_keys }
208+ stale = [k for k in dst_keys if k [len (dst_prefix ) :] not in src_relatives ]
209+
210+ delete_keys (client , bucket , stale , dry_run = dry_run )
211+
212+ iterator = (
213+ tqdm (src_keys , desc = f"↪ { dst_prefix } " , unit = "obj" )
214+ if src_keys else src_keys
215+ )
216+ for src_key in iterator :
217+ rel = src_key [len (src_prefix ) :]
218+ dst_key = f"{ dst_prefix } { rel } "
219+ if dry_run :
220+ print (f"[dry-run] copy s3://{ bucket } /{ src_key } → s3://{ bucket } /{ dst_key } " )
221+ continue
222+ client .copy_object (
223+ Bucket = bucket ,
224+ Key = dst_key ,
225+ CopySource = {"Bucket" : bucket , "Key" : src_key },
226+ ACL = "bucket-owner-full-control" ,
227+ MetadataDirective = "COPY" ,
228+ )
229+
230+ return {
231+ "src_prefix" : src_prefix ,
232+ "dst_prefix" : dst_prefix ,
233+ "copied" : len (src_keys ),
234+ "deleted" : len (stale ),
235+ }
0 commit comments