From 363329891d14936ed87295d910ce51f89c9a8429 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:22:11 +0000 Subject: [PATCH 1/3] Initial plan From 35a0a6eb14feb18be511ec1845f1891d7683ba44 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:34:33 +0000 Subject: [PATCH 2/3] Add delete_article_data management command with tests Create a Django management command that removes Article data and its related models (SourceArticle, Contributor, Affiliation, Journal, Program, License, Concepts). For ThematicArea objects, checks if they are referenced by directory models before deleting. For License objects, checks if they are referenced by ScholarlyArticles before deleting. Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- article/management/__init__.py | 0 article/management/commands/__init__.py | 0 .../commands/delete_article_data.py | 130 +++++++++++ article/tests_delete_article_data.py | 219 ++++++++++++++++++ 4 files changed, 349 insertions(+) create mode 100644 article/management/__init__.py create mode 100644 article/management/commands/__init__.py create mode 100644 article/management/commands/delete_article_data.py create mode 100644 article/tests_delete_article_data.py diff --git a/article/management/__init__.py b/article/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/article/management/commands/__init__.py b/article/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/article/management/commands/delete_article_data.py b/article/management/commands/delete_article_data.py new file mode 100644 index 00000000..4b9621ca --- /dev/null +++ b/article/management/commands/delete_article_data.py @@ -0,0 +1,130 @@ +from django.core.management.base import BaseCommand + +from article import models as article_models +from education_directory.models import EducationDirectory +from event_directory.models import EventDirectory +from infrastructure_directory.models import InfrastructureDirectory +from policy_directory.models import PolicyDirectory +from scholarly_articles.models import ScholarlyArticles +from usefulmodels.models import ThematicArea + + +class Command(BaseCommand): + help = "Remove all Article data and related models data." + + def handle(self, *args, **options): + # Delete Articles + article_count = article_models.Article.objects.count() + article_models.Article.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {article_count} Article(s)") + ) + + # Delete SourceArticles + source_article_count = article_models.SourceArticle.objects.count() + article_models.SourceArticle.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {source_article_count} SourceArticle(s)") + ) + + # Delete Contributors + contributor_count = article_models.Contributor.objects.count() + article_models.Contributor.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {contributor_count} Contributor(s)") + ) + + # Delete Affiliations + affiliation_count = article_models.Affiliation.objects.count() + article_models.Affiliation.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {affiliation_count} Affiliation(s)") + ) + + # Delete Journals + journal_count = article_models.Journal.objects.count() + article_models.Journal.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {journal_count} Journal(s)") + ) + + # Delete Programs + program_count = article_models.Program.objects.count() + article_models.Program.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {program_count} Program(s)") + ) + + # Delete Licenses (check ScholarlyArticles first) + self._delete_licenses() + + # Collect ThematicArea IDs from Concepts, delete Concepts, + # then delete ThematicAreas if not used by directory models + self._delete_concepts_and_thematic_areas() + + def _delete_licenses(self): + licenses = article_models.License.objects.all() + deleted_count = 0 + skipped_count = 0 + + for license_obj in licenses: + if ScholarlyArticles.objects.filter(license_id=license_obj.pk).exists(): + self.stdout.write( + self.style.WARNING( + f"License '{license_obj}' is referenced by ScholarlyArticles. Skipping." + ) + ) + skipped_count += 1 + else: + license_obj.delete() + deleted_count += 1 + + self.stdout.write( + self.style.SUCCESS( + f"Deleted {deleted_count} License(s), skipped {skipped_count}" + ) + ) + + def _delete_concepts_and_thematic_areas(self): + # Collect ThematicArea IDs referenced by Concepts before deleting them + thematic_area_ids = set( + article_models.Concepts.objects.filter( + thematic_areas__isnull=False + ).values_list("thematic_areas__id", flat=True) + ) + + # Delete Concepts + concepts_count = article_models.Concepts.objects.count() + article_models.Concepts.objects.all().delete() + self.stdout.write( + self.style.SUCCESS(f"Deleted {concepts_count} Concepts") + ) + + # Try to delete the ThematicAreas that were associated with Concepts + deleted_count = 0 + skipped_count = 0 + + for ta in ThematicArea.objects.filter(id__in=thematic_area_ids): + in_use = ( + EducationDirectory.objects.filter(thematic_areas=ta).exists() + or InfrastructureDirectory.objects.filter(thematic_areas=ta).exists() + or EventDirectory.objects.filter(thematic_areas=ta).exists() + or PolicyDirectory.objects.filter(thematic_areas=ta).exists() + ) + + if in_use: + self.stdout.write( + self.style.WARNING( + f"ThematicArea '{ta}' is referenced by directory models. Skipping." + ) + ) + skipped_count += 1 + else: + ta.delete() + deleted_count += 1 + + self.stdout.write( + self.style.SUCCESS( + f"Deleted {deleted_count} ThematicArea(s), skipped {skipped_count}" + ) + ) diff --git a/article/tests_delete_article_data.py b/article/tests_delete_article_data.py new file mode 100644 index 00000000..8a01f2be --- /dev/null +++ b/article/tests_delete_article_data.py @@ -0,0 +1,219 @@ +import pytest +from django.core.management import call_command +from io import StringIO + +from article.models import ( + Affiliation, + Article, + Concepts, + Contributor, + Journal, + License, + Program, + SourceArticle, +) +from core.users.tests.factories import UserFactory +from education_directory.models import EducationDirectory +from usefulmodels.models import ThematicArea + + +@pytest.fixture +def user(db): + return UserFactory() + + +@pytest.fixture +def thematic_area(db, user): + return ThematicArea.objects.create( + creator=user, + level0="Ciências Exatas e da Terra", + level1="Ciência da Computação", + level2="Teoria da Computação", + ) + + +@pytest.fixture +def thematic_area_in_directory(db, user): + ta = ThematicArea.objects.create( + creator=user, + level0="Ciências Humanas", + level1="Educação", + level2="Ensino-Aprendizagem", + ) + ed = EducationDirectory.objects.create(creator=user) + ed.thematic_areas.add(ta) + return ta + + +@pytest.fixture +def article_license(db): + return License.objects.create(name="CC-BY-4.0", url="https://creativecommons.org/licenses/by/4.0/") + + +@pytest.fixture +def journal(db): + return Journal.objects.create(journal_name="Test Journal", journal_issn_l="1234-5678") + + +@pytest.fixture +def affiliation(db): + return Affiliation.objects.create(name="Test University") + + +@pytest.fixture +def program(db, affiliation): + return Program.objects.create(name="Test Program", affiliation=affiliation) + + +@pytest.fixture +def contributor(db, affiliation): + c = Contributor.objects.create(family="Doe", given="John") + c.affiliations.add(affiliation) + return c + + +@pytest.fixture +def concept(db, thematic_area): + c = Concepts.objects.create(specific_id="C123", name="Machine Learning", level=1) + c.thematic_areas.add(thematic_area) + return c + + +@pytest.fixture +def source_article(db): + return SourceArticle.objects.create( + specific_id="SA001", + doi="10.1234/test", + title="Test Source Article", + ) + + +@pytest.fixture +def article(db, article_license, journal, contributor, concept, program, user): + a = Article.objects.create( + title="Test Article", + doi="10.1234/article", + year="2023", + license=article_license, + journal=journal, + creator=user, + ) + a.contributors.add(contributor) + a.concepts.add(concept) + a.programs.add(program) + return a + + +@pytest.mark.django_db +class TestDeleteArticleDataCommand: + def test_deletes_articles(self, article): + assert Article.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Article.objects.count() == 0 + assert "Deleted 1 Article(s)" in out.getvalue() + + def test_deletes_source_articles(self, source_article): + assert SourceArticle.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert SourceArticle.objects.count() == 0 + assert "Deleted 1 SourceArticle(s)" in out.getvalue() + + def test_deletes_contributors(self, contributor): + assert Contributor.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Contributor.objects.count() == 0 + assert "Deleted 1 Contributor(s)" in out.getvalue() + + def test_deletes_affiliations(self, affiliation): + assert Affiliation.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Affiliation.objects.count() == 0 + assert "Deleted 1 Affiliation(s)" in out.getvalue() + + def test_deletes_journals(self, journal): + assert Journal.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Journal.objects.count() == 0 + assert "Deleted 1 Journal(s)" in out.getvalue() + + def test_deletes_programs(self, program): + assert Program.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Program.objects.count() == 0 + assert "Deleted 1 Program(s)" in out.getvalue() + + def test_deletes_licenses(self, article_license): + assert License.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert License.objects.count() == 0 + assert "Deleted 1 License(s), skipped 0" in out.getvalue() + + def test_deletes_concepts(self, concept): + assert Concepts.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Concepts.objects.count() == 0 + assert "Deleted 1 Concepts" in out.getvalue() + + def test_deletes_thematic_area_not_in_directory(self, concept, thematic_area): + assert ThematicArea.objects.count() == 1 + out = StringIO() + call_command("delete_article_data", stdout=out) + assert ThematicArea.objects.count() == 0 + assert "Deleted 1 ThematicArea(s), skipped 0" in out.getvalue() + + def test_skips_thematic_area_in_directory( + self, concept, thematic_area, thematic_area_in_directory + ): + # thematic_area is used only by Concepts -> should be deleted + # thematic_area_in_directory is used by EducationDirectory -> should be skipped + concept.thematic_areas.add(thematic_area_in_directory) + assert ThematicArea.objects.count() == 2 + out = StringIO() + call_command("delete_article_data", stdout=out) + output = out.getvalue() + # The one used by directory should remain + assert ThematicArea.objects.count() == 1 + assert ThematicArea.objects.filter(id=thematic_area_in_directory.id).exists() + assert "Deleted 1 ThematicArea(s), skipped 1" in output + assert "is referenced by directory models. Skipping." in output + + def test_deletes_all_related_data( + self, + article, + source_article, + thematic_area, + ): + out = StringIO() + call_command("delete_article_data", stdout=out) + assert Article.objects.count() == 0 + assert SourceArticle.objects.count() == 0 + assert Contributor.objects.count() == 0 + assert Affiliation.objects.count() == 0 + assert Journal.objects.count() == 0 + assert Program.objects.count() == 0 + assert License.objects.count() == 0 + assert Concepts.objects.count() == 0 + # ThematicArea not used by directories should be deleted + assert ThematicArea.objects.filter(id=thematic_area.id).exists() is False + + def test_empty_database(self, db): + out = StringIO() + call_command("delete_article_data", stdout=out) + output = out.getvalue() + assert "Deleted 0 Article(s)" in output + assert "Deleted 0 SourceArticle(s)" in output + assert "Deleted 0 Contributor(s)" in output + assert "Deleted 0 Affiliation(s)" in output + assert "Deleted 0 Journal(s)" in output + assert "Deleted 0 Program(s)" in output + assert "Deleted 0 License(s), skipped 0" in output + assert "Deleted 0 Concepts" in output + assert "Deleted 0 ThematicArea(s), skipped 0" in output From 2a5c275726e774bc3b52def8d9b537a99b01ef32 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:35:30 +0000 Subject: [PATCH 3/3] Optimize database queries in delete_article_data command Pre-collect ScholarlyArticles license IDs and directory model ThematicArea IDs upfront to avoid per-object queries in loops. Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- .../commands/delete_article_data.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/article/management/commands/delete_article_data.py b/article/management/commands/delete_article_data.py index 4b9621ca..97769552 100644 --- a/article/management/commands/delete_article_data.py +++ b/article/management/commands/delete_article_data.py @@ -63,12 +63,19 @@ def handle(self, *args, **options): self._delete_concepts_and_thematic_areas() def _delete_licenses(self): + # Collect all license IDs referenced by ScholarlyArticles upfront + scholarly_license_ids = set( + ScholarlyArticles.objects.filter( + license__isnull=False + ).values_list("license_id", flat=True) + ) + licenses = article_models.License.objects.all() deleted_count = 0 skipped_count = 0 for license_obj in licenses: - if ScholarlyArticles.objects.filter(license_id=license_obj.pk).exists(): + if license_obj.pk in scholarly_license_ids: self.stdout.write( self.style.WARNING( f"License '{license_obj}' is referenced by ScholarlyArticles. Skipping." @@ -100,19 +107,26 @@ def _delete_concepts_and_thematic_areas(self): self.style.SUCCESS(f"Deleted {concepts_count} Concepts") ) + # Collect ThematicArea IDs referenced by directory models + directory_ta_ids = set() + for DirectoryModel in ( + EducationDirectory, + InfrastructureDirectory, + EventDirectory, + PolicyDirectory, + ): + directory_ta_ids.update( + DirectoryModel.objects.filter( + thematic_areas__id__in=thematic_area_ids + ).values_list("thematic_areas__id", flat=True) + ) + # Try to delete the ThematicAreas that were associated with Concepts deleted_count = 0 skipped_count = 0 for ta in ThematicArea.objects.filter(id__in=thematic_area_ids): - in_use = ( - EducationDirectory.objects.filter(thematic_areas=ta).exists() - or InfrastructureDirectory.objects.filter(thematic_areas=ta).exists() - or EventDirectory.objects.filter(thematic_areas=ta).exists() - or PolicyDirectory.objects.filter(thematic_areas=ta).exists() - ) - - if in_use: + if ta.pk in directory_ta_ids: self.stdout.write( self.style.WARNING( f"ThematicArea '{ta}' is referenced by directory models. Skipping."