Skip to content

Commit 9db79dd

Browse files
committed
feat: implement CLI command dcor-purge-unused-collections-and-circles
1 parent eba8d44 commit 9db79dd

4 files changed

Lines changed: 178 additions & 4 deletions

File tree

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
1.1.0
2+
- feat: implement CLI command `dcor-purge-unused-collections-and-circles`
13
1.0.2
24
- fix: only group admins and editors may add or remove datasets
35
- fix: only group admins may add or remove users

README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,11 @@ accordingly:
143143

144144
ckan dcor-prune-orphaned-s3-artifacts --older-than-days 21 --dry-run
145145

146+
- CKAN command ``dcor-purge-unused-collections-and-circles`` for removing collections
147+
and circles that are old and don't contain any datasets::
148+
149+
ckan dcor-purge-unused-collections-and-circles --older-than-months 12 --dry-run
150+
146151
- CKAN command ``send_mail`` for sending emails using the CKAN email credentials
147152

148153
Installation

ckanext/dcor_schemas/cli.py

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,12 @@ def dcor_move_dataset_to_circle(dataset, circle):
134134
print("...deleted old S3 objects")
135135

136136

137+
@click.command()
137138
@click.option('--older-than-days', default=21,
138139
help='Only prune datasets that were created before a given '
139140
+ 'number of days (set to -1 to prune all)')
140141
@click.option('--dry-run', is_flag=True,
141142
help='Do not actually remove anything')
142-
@click.command()
143143
def dcor_prune_draft_datasets(older_than_days=21, dry_run=False):
144144
"""Remove draft datasets from the CKAN database"""
145145
# Iterate over all packages
@@ -187,14 +187,14 @@ def dcor_prune_draft_datasets(older_than_days=21, dry_run=False):
187187
click.secho("Done!")
188188

189189

190+
@click.command()
190191
@click.option('--older-than-days', default=21,
191192
help='Only prune artifacts that were created before a given '
192193
+ 'number of days (set to -1 to prune all)')
193194
@click.option('--keep-orphan-buckets', is_flag=True,
194195
help='Keep buckets that do not represent a circle')
195196
@click.option('--dry-run', is_flag=True,
196197
help='Do not actually remove anything')
197-
@click.command()
198198
def dcor_prune_orphaned_s3_artifacts(older_than_days=21,
199199
keep_orphan_buckets=False,
200200
dry_run=False):
@@ -240,6 +240,61 @@ def dcor_prune_orphaned_s3_artifacts(older_than_days=21,
240240
click.secho("Done!")
241241

242242

243+
@click.command()
244+
@click.option('--modified-before-months', default=24,
245+
help='Only delete collections that were last modified before '
246+
'a given number of months (set to -1 to delete all)')
247+
@click.option('--dry-run', is_flag=True,
248+
help='Do not actually delete anything')
249+
def dcor_purge_unused_collections_and_circles(
250+
modified_before_months: int = 24,
251+
dry_run: bool = False):
252+
"""Purge old collections and circles that don't contain any datasets"""
253+
# Iterate over all collections
254+
for group in model.Group.iterall():
255+
if group.get_children_groups():
256+
print(f"Ignoring group '{group.id}' with children")
257+
continue
258+
# Does this group contain any datasets?
259+
query = (
260+
model.meta.Session.query(model.package.Package)
261+
# table with all active datasets
262+
.filter(model.package.Package.state == model.core.State.ACTIVE)
263+
# group table of the current group
264+
.filter(model.group.group_table.c["id"] == group.id,
265+
# and additionally only the old groups
266+
model.group.group_table.c["created"]
267+
< (datetime.datetime.now()
268+
- datetime.timedelta(days=31 * modified_before_months))
269+
)
270+
# member table with all active members
271+
.filter(model.group.member_table.c["state"] == 'active')
272+
# intersection of the members and package tables
273+
.join(model.group.member_table,
274+
model.group.member_table.c["table_id"]
275+
== model.package.Package.id)
276+
# intersection of the group table and the members table
277+
.join(model.group.group_table,
278+
model.group.group_table.c["id"]
279+
== model.group.member_table.c["group_id"])
280+
# we only need one
281+
.limit(1)
282+
)
283+
284+
if not query.count():
285+
if group.is_organization:
286+
print(f"Delete circle {group.id}")
287+
purge_method = logic.action.delete.group_organization
288+
else:
289+
print(f"Delete collection {group.id}")
290+
purge_method = logic.action.delete.group_purge
291+
292+
if not dry_run:
293+
# The `group_purge` method makes sure that all the memberships
294+
# (users) are deleted before removing the group.
295+
purge_method(admin_context(), {"id": group.id})
296+
297+
243298
@click.command()
244299
def list_circles():
245300
"""List all circles/organizations"""
@@ -296,10 +351,10 @@ def list_zombie_users(last_activity_weeks=12):
296351
click.echo(user.name)
297352

298353

354+
@click.command()
299355
@click.option('--modified-days', default=-1,
300356
help='Only run for datasets modified within this number of days '
301357
+ 'in the past. Set to -1 to apply to all datasets.')
302-
@click.command()
303358
def run_jobs_dcor_schemas(modified_days=-1):
304359
"""Set .rtdc metadata and SHA256 sums and for all resources
305360
@@ -343,6 +398,7 @@ def run_jobs_dcor_schemas(modified_days=-1):
343398
click.echo("Done!")
344399

345400

401+
@click.command()
346402
@click.option('--recipient', type=str)
347403
@click.option('--subject', type=str, default="DCOR Email")
348404
@click.option('--file_body',
@@ -352,7 +408,6 @@ def run_jobs_dcor_schemas(modified_days=-1):
352408
path_type=pathlib.Path),
353409
default=None,
354410
)
355-
@click.command()
356411
def send_mail(recipient, subject=None, file_body=None):
357412
"""Send email to `recipient` with `subject` with content of `file_body`
358413
@@ -375,6 +430,7 @@ def get_commands():
375430
dcor_move_dataset_to_circle,
376431
dcor_prune_draft_datasets,
377432
dcor_prune_orphaned_s3_artifacts,
433+
dcor_purge_unused_collections_and_circles,
378434
list_circles,
379435
list_collections,
380436
list_group_resources,

ckanext/dcor_schemas/tests/test_cli.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,114 @@ def test_dcor_prune_orphaned_s3_artifacts(cli):
330330
print(rid)
331331
print(ds_dict["id"])
332332
assert not s3.object_exists(bucket_name, object_name)
333+
334+
335+
@pytest.mark.ckan_config('ckan.plugins', 'dcor_schemas')
336+
@pytest.mark.usefixtures('clean_db', 'with_plugins', 'with_request_context')
337+
def test_dcor_purge_unused_collections_and_circles(cli):
338+
user = factories.User()
339+
context = {'user': user['id']}
340+
341+
circle_keep = factories.Organization(users=[{
342+
'name': user['id'],
343+
'capacity': 'admin'
344+
}])
345+
circle_remove = factories.Organization(users=[{
346+
'name': user['id'],
347+
'capacity': 'admin'
348+
}])
349+
350+
group_keep = factories.Group(users=[{
351+
'name': user['id'],
352+
'capacity': 'admin'
353+
}])
354+
group_remove = factories.Group(users=[{
355+
'name': user['id'],
356+
'capacity': 'admin'
357+
}])
358+
359+
# create a dataset
360+
ds_dict, res_dict = make_dataset_via_s3(
361+
create_context=context,
362+
owner_org=circle_keep,
363+
resource_path=data_path / "calibration_beads_47.rtdc",
364+
activate=True,
365+
private=False,
366+
authors="Peter Pan")
367+
368+
# add the dataset to the group_keep
369+
helpers.call_action("member_create",
370+
context,
371+
id=group_keep["id"],
372+
object=ds_dict["id"],
373+
object_type="package",
374+
capacity="member",
375+
)
376+
377+
# circle_remove and group_remove should still be there after this,
378+
# since they were just created.
379+
cli.invoke(ckan_cli,
380+
["dcor-purge-unused-collections-and_circles"])
381+
assert helpers.call_action("group_show",
382+
context,
383+
id=group_keep["id"]
384+
)["id"] == group_keep["id"]
385+
assert helpers.call_action("group_show",
386+
context,
387+
id=group_remove["id"]
388+
)["id"] == group_remove["id"]
389+
assert helpers.call_action("organization_show",
390+
context,
391+
id=circle_keep["id"]
392+
)["id"] == circle_keep["id"]
393+
assert helpers.call_action("organization_show",
394+
context,
395+
id=circle_remove["id"]
396+
)["id"] == circle_remove["id"]
397+
398+
# The same thing happens when we use --dry-run
399+
cli.invoke(ckan_cli,
400+
["dcor-purge-unused-collections-and_circles",
401+
"--modified-before-months", "0",
402+
"--dry-run"])
403+
assert helpers.call_action("group_show",
404+
context,
405+
id=group_keep["id"]
406+
)["id"] == group_keep["id"]
407+
assert helpers.call_action("group_show",
408+
context,
409+
id=group_remove["id"]
410+
)["id"] == group_remove["id"]
411+
assert helpers.call_action("organization_show",
412+
context,
413+
id=circle_keep["id"]
414+
)["id"] == circle_keep["id"]
415+
assert helpers.call_action("organization_show",
416+
context,
417+
id=circle_remove["id"]
418+
)["id"] == circle_remove["id"]
419+
420+
# But if we actually remove things, only the *_keep stuff should stay
421+
cli.invoke(ckan_cli,
422+
["dcor-purge-unused-collections-and_circles",
423+
"--modified-before-months", "0"])
424+
assert helpers.call_action("group_show",
425+
context,
426+
id=group_keep["id"]
427+
)["id"] == group_keep["id"]
428+
assert helpers.call_action("organization_show",
429+
context,
430+
id=circle_keep["id"]
431+
)["id"] == circle_keep["id"]
432+
433+
with pytest.raises(logic.NotFound):
434+
helpers.call_action("group_show",
435+
context,
436+
id=group_remove["id"]
437+
)
438+
439+
with pytest.raises(logic.NotFound):
440+
helpers.call_action("organization_show",
441+
context,
442+
id=circle_remove["id"]
443+
)

0 commit comments

Comments
 (0)