@@ -134,12 +134,12 @@ def dcor_move_dataset_to_circle(dataset, circle):
134134 print ("...deleted old S3 objects" )
135135
136136
137+ @click .command ()
137138@click .option ('--older-than-days' , default = 21 ,
138139 help = 'Only prune datasets that were created before a given '
139140 + 'number of days (set to -1 to prune all)' )
140141@click .option ('--dry-run' , is_flag = True ,
141142 help = 'Do not actually remove anything' )
142- @click .command ()
143143def dcor_prune_draft_datasets (older_than_days = 21 , dry_run = False ):
144144 """Remove draft datasets from the CKAN database"""
145145 # Iterate over all packages
@@ -187,14 +187,14 @@ def dcor_prune_draft_datasets(older_than_days=21, dry_run=False):
187187 click .secho ("Done!" )
188188
189189
190+ @click .command ()
190191@click .option ('--older-than-days' , default = 21 ,
191192 help = 'Only prune artifacts that were created before a given '
192193 + 'number of days (set to -1 to prune all)' )
193194@click .option ('--keep-orphan-buckets' , is_flag = True ,
194195 help = 'Keep buckets that do not represent a circle' )
195196@click .option ('--dry-run' , is_flag = True ,
196197 help = 'Do not actually remove anything' )
197- @click .command ()
198198def dcor_prune_orphaned_s3_artifacts (older_than_days = 21 ,
199199 keep_orphan_buckets = False ,
200200 dry_run = False ):
@@ -240,6 +240,61 @@ def dcor_prune_orphaned_s3_artifacts(older_than_days=21,
240240 click .secho ("Done!" )
241241
242242
243+ @click .command ()
244+ @click .option ('--modified-before-months' , default = 24 ,
245+ help = 'Only delete collections that were last modified before '
246+ 'a given number of months (set to -1 to delete all)' )
247+ @click .option ('--dry-run' , is_flag = True ,
248+ help = 'Do not actually delete anything' )
249+ def dcor_purge_unused_collections_and_circles (
250+ modified_before_months : int = 24 ,
251+ dry_run : bool = False ):
252+ """Purge old collections and circles that don't contain any datasets"""
253+ # Iterate over all collections
254+ for group in model .Group .iterall ():
255+ if group .get_children_groups ():
256+ print (f"Ignoring group '{ group .id } ' with children" )
257+ continue
258+ # Does this group contain any datasets?
259+ query = (
260+ model .meta .Session .query (model .package .Package )
261+ # table with all active datasets
262+ .filter (model .package .Package .state == model .core .State .ACTIVE )
263+ # group table of the current group
264+ .filter (model .group .group_table .c ["id" ] == group .id ,
265+ # and additionally only the old groups
266+ model .group .group_table .c ["created" ]
267+ < (datetime .datetime .now ()
268+ - datetime .timedelta (days = 31 * modified_before_months ))
269+ )
270+ # member table with all active members
271+ .filter (model .group .member_table .c ["state" ] == 'active' )
272+ # intersection of the members and package tables
273+ .join (model .group .member_table ,
274+ model .group .member_table .c ["table_id" ]
275+ == model .package .Package .id )
276+ # intersection of the group table and the members table
277+ .join (model .group .group_table ,
278+ model .group .group_table .c ["id" ]
279+ == model .group .member_table .c ["group_id" ])
280+ # we only need one
281+ .limit (1 )
282+ )
283+
284+ if not query .count ():
285+ if group .is_organization :
286+ print (f"Delete circle { group .id } " )
287+ purge_method = logic .action .delete .group_organization
288+ else :
289+ print (f"Delete collection { group .id } " )
290+ purge_method = logic .action .delete .group_purge
291+
292+ if not dry_run :
293+ # The `group_purge` method makes sure that all the memberships
294+ # (users) are deleted before removing the group.
295+ purge_method (admin_context (), {"id" : group .id })
296+
297+
243298@click .command ()
244299def list_circles ():
245300 """List all circles/organizations"""
@@ -296,10 +351,10 @@ def list_zombie_users(last_activity_weeks=12):
296351 click .echo (user .name )
297352
298353
354+ @click .command ()
299355@click .option ('--modified-days' , default = - 1 ,
300356 help = 'Only run for datasets modified within this number of days '
301357 + 'in the past. Set to -1 to apply to all datasets.' )
302- @click .command ()
303358def run_jobs_dcor_schemas (modified_days = - 1 ):
304359 """Set .rtdc metadata and SHA256 sums and for all resources
305360
@@ -343,6 +398,7 @@ def run_jobs_dcor_schemas(modified_days=-1):
343398 click .echo ("Done!" )
344399
345400
401+ @click .command ()
346402@click .option ('--recipient' , type = str )
347403@click .option ('--subject' , type = str , default = "DCOR Email" )
348404@click .option ('--file_body' ,
@@ -352,7 +408,6 @@ def run_jobs_dcor_schemas(modified_days=-1):
352408 path_type = pathlib .Path ),
353409 default = None ,
354410 )
355- @click .command ()
356411def send_mail (recipient , subject = None , file_body = None ):
357412 """Send email to `recipient` with `subject` with content of `file_body`
358413
@@ -375,6 +430,7 @@ def get_commands():
375430 dcor_move_dataset_to_circle ,
376431 dcor_prune_draft_datasets ,
377432 dcor_prune_orphaned_s3_artifacts ,
433+ dcor_purge_unused_collections_and_circles ,
378434 list_circles ,
379435 list_collections ,
380436 list_group_resources ,
0 commit comments