diff --git a/Makefile b/Makefile index e95d6c944..464073319 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ ifneq ($(shell docker compose version 2>/dev/null),) DOCKER_COMPOSE=docker compose else DOCKER_COMPOSE=docker-compose + DOCKER_COMPATIBILITY=--compatibility endif help: ## Show this help @@ -47,6 +48,10 @@ build_no_cache: ## Build app using $(compose) --no-cache up: ## Start app using $(compose) $(DOCKER_COMPOSE) -f $(compose) up -d +up_scale: ## Start app using $(compose) and scaling worker up to $(numworkers) + $(eval numworkers ?= 1) + $(DOCKER_COMPOSE) $(DOCKER_COMPATIBILITY) -f $(compose) up -d --scale celeryworker=$(numworkers) + logs: ## See all app logs using $(compose) $(DOCKER_COMPOSE) -f $(compose) logs -f @@ -58,6 +63,12 @@ restart: ps: ## See all containers using $(compose) $(DOCKER_COMPOSE) -f $(compose) ps +top: ## See docker top using $(compose) + $(DOCKER_COMPOSE) -f $(compose) top + +stats: ## See docker stats using $(compose) + $(DOCKER_COMPOSE) -f $(compose) stats + rm: ## Remove all containers using $(compose) $(DOCKER_COMPOSE) -f $(compose) rm -f @@ -149,7 +160,7 @@ volume_down: ## Remove all volume $(DOCKER_COMPOSE) -f $(compose) down -v clean_celery_logs: - @sudo truncate -s 0 $$(docker inspect --format='{{.LogPath}}' upload_production_celeryworker) + @sudo truncate -s 0 $$(docker inspect --format='{{.LogPath}}' $$($(DOCKER_COMPOSE) -f $(compose) ps -q celeryworker)) exclude_upload_production_django: ## Exclude all productions containers @if [ -n "$$(docker images --format '{{.Repository}}:{{.Tag}}' | grep 'infrascielo/upload' | grep -v 'upload_production_postgres')" ]; then \ diff --git a/VERSION b/VERSION index a42079045..cf37bb132 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v3.0.0rc20 +v2.12.1rc diff --git a/article/migrations/0008_add_unique_pid_v3.py b/article/migrations/0008_add_unique_pid_v3.py new file mode 100644 index 000000000..f4639808d --- /dev/null +++ b/article/migrations/0008_add_unique_pid_v3.py @@ -0,0 +1,41 @@ +# Generated by Django 5.2.3 on 2026-03-16 23:00 + +from django.db import migrations, models + + +def remove_duplicate_articles(apps, schema_editor): + """Remove duplicate Article records with the same pid_v3, keeping the most recently updated one.""" + Article = apps.get_model("article", "Article") + from django.db.models import Count + + # Normalize empty strings to NULL so the unique constraint ignores them + Article.objects.filter(pid_v3="").update(pid_v3=None) + + duplicates = ( + Article.objects.values("pid_v3") + .exclude(pid_v3__isnull=True) + .annotate(count=Count("id")) + .filter(count__gt=1) + ) + for dup in duplicates: + pid_v3 = dup["pid_v3"] + keep = Article.objects.filter(pid_v3=pid_v3).order_by("-updated").first() + if keep: + Article.objects.filter(pid_v3=pid_v3).exclude(pk=keep.pk).delete() + + +class Migration(migrations.Migration): + dependencies = [ + ("article", "0007_alter_article_options_article_first_pubdate_iso"), + ] + + operations = [ + migrations.RunPython( + remove_duplicate_articles, + migrations.RunPython.noop, + ), + migrations.RemoveIndex( + model_name="article", + name="article_art_pid_v3_2370cc_idx", + ), + ] diff --git a/article/migrations/0009_alter_article_pid_v3_unique.py b/article/migrations/0009_alter_article_pid_v3_unique.py new file mode 100644 index 000000000..b1346857f --- /dev/null +++ b/article/migrations/0009_alter_article_pid_v3_unique.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.3 on 2026-03-16 23:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("article", "0008_add_unique_pid_v3"), + ] + + operations = [ + migrations.AlterField( + model_name="article", + name="pid_v3", + field=models.CharField( + blank=True, max_length=23, null=True, unique=True, verbose_name="PID v3" + ), + ), + ] diff --git a/article/models.py b/article/models.py index 1305c99e1..fe6afe33a 100644 --- a/article/models.py +++ b/article/models.py @@ -48,7 +48,7 @@ class Article(ClusterableModel, CommonControlField): SPSPkg, blank=True, null=True, on_delete=models.SET_NULL ) # PID v3 - pid_v3 = models.CharField(_("PID v3"), max_length=23, blank=True, null=True) + pid_v3 = models.CharField(_("PID v3"), max_length=23, blank=True, null=True, unique=True) pid_v2 = models.CharField(_("PID v2"), max_length=23, blank=True, null=True) # Article type @@ -124,7 +124,6 @@ class Article(ClusterableModel, CommonControlField): class Meta: indexes = [ - models.Index(fields=["pid_v3"]), models.Index(fields=["status"]), ] ordering = ["position", "fpage", "-first_pubdate_iso"] @@ -205,7 +204,15 @@ def article_langs(self): @classmethod def get(cls, pid_v3): if pid_v3: - return cls.objects.get(pid_v3=pid_v3) + try: + return cls.objects.get(pid_v3=pid_v3) + except cls.MultipleObjectsReturned: + qs = cls.objects.filter(pid_v3=pid_v3).order_by("-updated") + obj = qs.first() + if obj is None: + raise cls.DoesNotExist + qs.exclude(pk=obj.pk).delete() + return obj raise ValueError("Article.get requires pid_v3") @classmethod @@ -470,6 +477,152 @@ def get_repeated_items(cls, field_name, journal=None): .values_list(field_name, flat=True) ) + @staticmethod + def has_valid_pid_v2(pid_v2, order): + """ + Check if pid_v2 last 5 digits match the order value + (from MigratedArticle.document.order / v121) padded with zeros. + """ + try: + if not pid_v2 or not order: + return True + if len(pid_v2) < 5: + return True + expected_suffix = str(int(str(order).strip())).zfill(5) + actual_suffix = pid_v2[-5:] + return actual_suffix == expected_suffix + except (TypeError, IndexError, ValueError): + return True + + @classmethod + def exclude_articles_with_invalid_pid_v2(cls, journal=None): + """ + Find and delete migrated articles whose pid_v2 last 5 digits + don't match the order (v121) from MigratedArticle.document.order. + Uses ArticleProc.migrated_data to access the migration data. + Only applies to migrated articles. + """ + from proc.models import ArticleProc + + filters = { + "migrated_data__isnull": False, + "sps_pkg__isnull": False, + } + if journal: + filters["issue_proc__journal_proc__journal"] = journal + + article_procs = ArticleProc.objects.filter( + **filters + ).select_related("migrated_data", "sps_pkg") + + # Bulk-fetch Article records to avoid N+1 queries via ArticleProc.article + sps_pkg_id_list = [ + ap.sps_pkg_id for ap in article_procs if ap.sps_pkg_id + ] + articles_by_sps_pkg = {} + if sps_pkg_id_list: + for article in Article.objects.filter( + sps_pkg_id__in=sps_pkg_id_list + ).only("id", "pid_v2", "sps_pkg_id", "pp_xml_id"): + articles_by_sps_pkg[article.sps_pkg_id] = article + + events = [] + sps_pkg_ids = set() + pp_xml_ids = set() + article_ids = [] + + for article_proc in article_procs: + try: + article = articles_by_sps_pkg.get(article_proc.sps_pkg_id) + if not article or not article.pid_v2: + continue + + order = article_proc.migrated_data.document.order + if not order: + continue + + if not cls.has_valid_pid_v2(article.pid_v2, order): + try: + expected_suffix = str(int(str(order).strip())).zfill(5) + except (TypeError, ValueError): + expected_suffix = str(order) + events.append( + f"Invalid pid_v2: {article.pid_v2} " + f"(order={order}, " + f"expected suffix={expected_suffix}, " + f"actual suffix={article.pid_v2[-5:]})" + ) + article_ids.append(article.id) + if article.sps_pkg_id: + sps_pkg_ids.add(article.sps_pkg_id) + if article.pp_xml_id: + pp_xml_ids.add(article.pp_xml_id) + except Exception as e: + logging.exception( + f"Error checking pid_v2 for ArticleProc {article_proc}: {e}" + ) + + if not article_ids: + events.append("No migrated articles with invalid pid_v2 found") + return events + + with transaction.atomic(): + deleted_articles, _ = cls.objects.filter(id__in=article_ids).delete() + events.append(f"Articles deletados: {deleted_articles}") + + if sps_pkg_ids: + deleted_sps, _ = SPSPkg.objects.filter(id__in=sps_pkg_ids).delete() + events.append(f"SPSPkg deletados: {deleted_sps}") + + if pp_xml_ids: + deleted_pp, _ = PidProviderXML.objects.filter(id__in=pp_xml_ids).delete() + events.append(f"PidProviderXML deletados: {deleted_pp}") + + return events + + @classmethod + def exclude_inconvenient_articles(cls, journal, user, timeout=None): + """ + Remove all inconvenient article records in a unified operation: + 1. Migrated articles with invalid pid_v2 (suffix doesn't match order from v121) + 2. Duplicate articles (repeated pid_v2 or sps_pkg_name) + """ + results = { + "events": [], + "numbers": {}, + "exceptions": [], + } + + try: + events = cls.exclude_articles_with_invalid_pid_v2(journal) + results["events"].extend(events) + except Exception as e: + results["exceptions"].append( + { + "exclude_articles_with_invalid_pid_v2": str(e), + "traceback": traceback.format_exc(), + } + ) + + for field_name in ("pid_v2", "sps_pkg__sps_pkg_name"): + repeated_items = cls.get_repeated_items(field_name, journal) + results["numbers"][f"repeated_by_{field_name}"] = repeated_items.count() + for repeated_value in repeated_items: + try: + events = cls.exclude_repetitions( + user, field_name, repeated_value, timeout=timeout + ) + results["events"].extend(events) + except Exception as e: + results["exceptions"].append( + { + f"repeated_by_{field_name}": repeated_value, + "traceback": traceback.format_exc(), + } + ) + + return results + @classmethod def select_articles(cls, journal_id_list=None, issue_id_list=None): kwargs = {} diff --git a/article/test_models.py b/article/test_models.py new file mode 100644 index 000000000..3c0f86c34 --- /dev/null +++ b/article/test_models.py @@ -0,0 +1,270 @@ +import unittest +from unittest.mock import MagicMock, Mock, patch + +from article.models import Article + + +class ArticleHasValidPidV2TestCase(unittest.TestCase): + """Test cases for Article.has_valid_pid_v2() static method.""" + + def test_valid_pid_v2_matching_order(self): + """pid_v2 last 5 digits match order zero-padded.""" + self.assertTrue(Article.has_valid_pid_v2("S0034-77442021000600036", "00036")) + + def test_valid_pid_v2_matching_order_without_leading_zeros(self): + """pid_v2 last 5 digits match order even when order has no leading zeros.""" + self.assertTrue(Article.has_valid_pid_v2("S0034-77442021000600036", "36")) + + def test_invalid_pid_v2_not_matching_order(self): + """pid_v2 last 5 digits don't match order.""" + self.assertFalse(Article.has_valid_pid_v2("S0034-77442021060626158", "36")) + + def test_valid_when_pid_v2_is_none(self): + """Returns True when pid_v2 is None (can't validate).""" + self.assertTrue(Article.has_valid_pid_v2(None, "36")) + + def test_valid_when_order_is_none(self): + """Returns True when order is None (can't validate).""" + self.assertTrue(Article.has_valid_pid_v2("S0034-77442021000600036", None)) + + def test_valid_when_order_is_empty(self): + """Returns True when order is empty string (can't validate).""" + self.assertTrue(Article.has_valid_pid_v2("S0034-77442021000600036", "")) + + def test_valid_with_order_zero(self): + """Order 0 matches suffix 00000.""" + self.assertTrue(Article.has_valid_pid_v2("S003477442021000600000", "0")) + + def test_invalid_with_order_1(self): + """Order 1 should match suffix 00001, not 26158.""" + self.assertFalse(Article.has_valid_pid_v2("S0034-77442021060626158", "1")) + + def test_valid_when_pid_v2_too_short(self): + """Returns True when pid_v2 has fewer than 5 chars.""" + self.assertTrue(Article.has_valid_pid_v2("S034", "36")) + + def test_valid_when_pid_v2_is_empty(self): + """Returns True when pid_v2 is empty string.""" + self.assertTrue(Article.has_valid_pid_v2("", "36")) + + def test_valid_when_order_is_non_numeric(self): + """Returns True when order is non-numeric (can't validate).""" + self.assertTrue(Article.has_valid_pid_v2("S0034-77442021000600036", "abc")) + + +class ArticleExcludeArticlesWithInvalidPidV2TestCase(unittest.TestCase): + """Test cases for Article.exclude_articles_with_invalid_pid_v2().""" + + def _make_article_proc(self, article_id, pid_v2, order, sps_pkg_id=1, pp_xml_id=1): + mock_article = Mock() + mock_article.id = article_id + mock_article.pid_v2 = pid_v2 + mock_article.sps_pkg_id = sps_pkg_id + mock_article.pp_xml_id = pp_xml_id + + mock_document = Mock() + mock_document.order = order + + mock_migrated_data = Mock() + mock_migrated_data.document = mock_document + + mock_article_proc = Mock() + mock_article_proc.sps_pkg_id = sps_pkg_id + mock_article_proc.migrated_data = mock_migrated_data + return mock_article_proc, mock_article + + @patch("article.models.Article.objects") + def test_no_invalid_articles(self, mock_objects): + """No articles deleted when all have valid pid_v2.""" + mock_article_proc, mock_article = self._make_article_proc( + article_id=1, pid_v2="S0034-77442021000600036", order="36" + ) + + mock_ap_qs = MagicMock() + mock_ap_qs.filter.return_value = mock_ap_qs + mock_ap_qs.select_related.return_value = [mock_article_proc] + + mock_article_qs = MagicMock() + mock_article_qs.only.return_value = [mock_article] + mock_objects.filter.return_value = mock_article_qs + + with patch("proc.models.ArticleProc.objects", mock_ap_qs): + events = Article.exclude_articles_with_invalid_pid_v2() + + self.assertIn("No migrated articles with invalid pid_v2 found", events) + + @patch("article.models.transaction") + @patch("article.models.PidProviderXML") + @patch("article.models.SPSPkg") + @patch("article.models.Article.objects") + def test_deletes_invalid_article(self, mock_objects, mock_sps_pkg, mock_pp_xml, mock_transaction): + """Deletes migrated article with invalid pid_v2.""" + mock_article_proc, mock_article = self._make_article_proc( + article_id=42, pid_v2="S0034-77442021060626158", order="36", + sps_pkg_id=10, pp_xml_id=20 + ) + + mock_ap_qs = MagicMock() + mock_ap_qs.filter.return_value = mock_ap_qs + mock_ap_qs.select_related.return_value = [mock_article_proc] + + mock_article_qs = MagicMock() + mock_article_qs.only.return_value = [mock_article] + + mock_delete_qs = MagicMock() + mock_delete_qs.delete.return_value = (1, {}) + + # First call: bulk fetch articles; subsequent calls: delete operations + mock_objects.filter.side_effect = [mock_article_qs, mock_delete_qs] + + mock_sps_pkg_qs = MagicMock() + mock_sps_pkg_qs.delete.return_value = (1, {}) + mock_sps_pkg.objects.filter.return_value = mock_sps_pkg_qs + + mock_pp_xml_qs = MagicMock() + mock_pp_xml_qs.delete.return_value = (1, {}) + mock_pp_xml.objects.filter.return_value = mock_pp_xml_qs + + mock_transaction.atomic.return_value.__enter__ = Mock() + mock_transaction.atomic.return_value.__exit__ = Mock(return_value=False) + + with patch("proc.models.ArticleProc.objects", mock_ap_qs): + events = Article.exclude_articles_with_invalid_pid_v2() + + self.assertTrue(any("Invalid pid_v2" in e for e in events)) + self.assertTrue(any("Articles deletados" in e for e in events)) + + def test_with_journal_filter(self): + """Filters ArticleProc by journal when provided.""" + mock_journal = Mock() + + mock_ap_qs = MagicMock() + mock_ap_qs.filter.return_value = mock_ap_qs + mock_ap_qs.select_related.return_value = [] + + with patch("proc.models.ArticleProc.objects", mock_ap_qs): + events = Article.exclude_articles_with_invalid_pid_v2(journal=mock_journal) + + mock_ap_qs.filter.assert_called_once_with( + migrated_data__isnull=False, + sps_pkg__isnull=False, + issue_proc__journal_proc__journal=mock_journal, + ) + + +class ArticleExcludeInconvenientArticlesTestCase(unittest.TestCase): + """Test cases for Article.exclude_inconvenient_articles().""" + + @patch("article.models.Article.exclude_repetitions") + @patch("article.models.Article.get_repeated_items") + @patch("article.models.Article.exclude_articles_with_invalid_pid_v2") + def test_calls_both_operations(self, mock_invalid, mock_repeated, mock_exclude_rep): + """Calls both invalid pid_v2 and repetition removal.""" + mock_invalid.return_value = ["invalid pid_v2 event"] + + mock_repeated_qs = MagicMock() + mock_repeated_qs.count.return_value = 0 + mock_repeated_qs.__iter__ = Mock(return_value=iter([])) + mock_repeated.return_value = mock_repeated_qs + + mock_journal = Mock() + mock_user = Mock() + + results = Article.exclude_inconvenient_articles(mock_journal, mock_user) + + mock_invalid.assert_called_once_with(mock_journal) + self.assertEqual(mock_repeated.call_count, 2) + self.assertIn("invalid pid_v2 event", results["events"]) + + @patch("article.models.Article.exclude_repetitions") + @patch("article.models.Article.get_repeated_items") + @patch("article.models.Article.exclude_articles_with_invalid_pid_v2") + def test_collects_repetition_events(self, mock_invalid, mock_repeated, mock_exclude_rep): + """Collects events from repetition removal.""" + mock_invalid.return_value = [] + mock_exclude_rep.return_value = ["repetition event"] + + mock_repeated_qs = MagicMock() + mock_repeated_qs.count.return_value = 1 + mock_repeated_qs.__iter__ = Mock(return_value=iter(["value1"])) + mock_repeated.return_value = mock_repeated_qs + + results = Article.exclude_inconvenient_articles(Mock(), Mock()) + + self.assertIn("repetition event", results["events"]) + + @patch("article.models.Article.exclude_repetitions") + @patch("article.models.Article.get_repeated_items") + @patch("article.models.Article.exclude_articles_with_invalid_pid_v2") + def test_captures_exceptions(self, mock_invalid, mock_repeated, mock_exclude_rep): + """Captures exceptions without stopping execution.""" + mock_invalid.side_effect = Exception("test error") + + mock_repeated_qs = MagicMock() + mock_repeated_qs.count.return_value = 0 + mock_repeated_qs.__iter__ = Mock(return_value=iter([])) + mock_repeated.return_value = mock_repeated_qs + + results = Article.exclude_inconvenient_articles(Mock(), Mock()) + + self.assertEqual(len(results["exceptions"]), 1) + self.assertIn("test error", results["exceptions"][0]["exclude_articles_with_invalid_pid_v2"]) + + +class ArticleGetTestCase(unittest.TestCase): + """Test cases for Article.get() handling of duplicate records.""" + + def test_get_raises_value_error_without_pid_v3(self): + """Test that get() raises ValueError when pid_v3 is not provided.""" + with self.assertRaises(ValueError): + Article.get(None) + + def test_get_raises_value_error_with_empty_pid_v3(self): + """Test that get() raises ValueError when pid_v3 is empty string.""" + with self.assertRaises(ValueError): + Article.get("") + + @patch("article.models.Article.objects") + def test_get_returns_single_article(self, mock_objects): + """Test that get() returns the article when exactly one match exists.""" + mock_article = Mock(spec=Article) + mock_objects.get.return_value = mock_article + + result = Article.get("pid123") + + self.assertEqual(result, mock_article) + mock_objects.get.assert_called_once_with(pid_v3="pid123") + + @patch("article.models.Article.objects") + def test_get_raises_does_not_exist(self, mock_objects): + """Test that get() raises DoesNotExist when no article is found.""" + mock_objects.get.side_effect = Article.DoesNotExist() + + with self.assertRaises(Article.DoesNotExist): + Article.get("pid123") + + @patch("article.models.Article.objects") + def test_get_handles_multiple_objects_returned(self, mock_objects): + """Test that get() handles duplicates by keeping the most recent and deleting others.""" + mock_objects.get.side_effect = Article.MultipleObjectsReturned() + + mock_recent = Mock(spec=Article) + mock_recent.pk = 1 + + mock_queryset = MagicMock() + mock_ordered_qs = MagicMock() + mock_ordered_qs.first.return_value = mock_recent + mock_queryset.order_by.return_value = mock_ordered_qs + + mock_exclude_qs = MagicMock() + mock_ordered_qs.exclude.return_value = mock_exclude_qs + + mock_objects.filter.return_value = mock_queryset + + result = Article.get("pid123") + + self.assertEqual(result, mock_recent) + mock_objects.filter.assert_any_call(pid_v3="pid123") + mock_queryset.order_by.assert_called_with("-updated") + mock_ordered_qs.exclude.assert_called_once_with(pk=mock_recent.pk) + mock_exclude_qs.delete.assert_called_once() diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index aa6d6c442..a6776d013 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -63,6 +63,13 @@ class ArticleSnippetViewSet(SnippetViewSet): ) # inspect_view_fields não é usado em SnippetViewSet, use inspect_view_class customizada + def get_queryset(self, request): + qs = super().get_queryset(request) + try: + return qs.distinct() + except AttributeError: + return qs + class RelatedItemSnippetViewSet(SnippetViewSet): model = RelatedItem @@ -121,7 +128,10 @@ def get_queryset(self, request): # if self.permission_helper.user_can_make_article_change(request.user, None): # return qs - return qs + try: + return qs.distinct() + except AttributeError: + return qs class ArticleSnippetViewSetGroup(SnippetViewSetGroup): diff --git a/bigbang/tasks_scheduler.py b/bigbang/tasks_scheduler.py index c0b567477..d3200c8f4 100644 --- a/bigbang/tasks_scheduler.py +++ b/bigbang/tasks_scheduler.py @@ -44,6 +44,9 @@ FETCH_AND_CREATE_JOURNAL_MINUTES = MINUTES[10] FETCH_AND_CREATE_JOURNAL_PRIORITY = 1 +PRESS_RELEASE_MINUTES = MINUTES[10] +PRESS_RELEASE_PRIORITY = 1 + TITLE_DB_MIGRATION_PRIORITY = 0 ISSUE_DB_MIGRATION_PRIORITY = 0 ARTICLE_DB_MIGRATION_PRIORITY = 4 @@ -105,6 +108,7 @@ def schedule_publication_subtasks(username): def schedule_subtasks(username): enabled = False _schedule_fetch_and_create_journal(username, enabled) # Nova tarefa adicionada + _schedule_try_fetch_and_register_press_release(username, enabled) def _schedule_check_article_availability(username, enabled=False): @@ -299,6 +303,7 @@ def _schedule_publish_journals(username, enabled=False): collection_acron=None, journal_acron=None, force_update=False, + verify=False, ), description=_("Publica periódicos"), priority=TITLE_DB_MIGRATION_PRIORITY, @@ -324,6 +329,7 @@ def _schedule_publish_issues(username, enabled=False): journal_acron=None, publication_year=None, force_update=False, + verify=False, ), description=_("Publica fascículos"), priority=ISSUE_DB_MIGRATION_PRIORITY, @@ -345,6 +351,7 @@ def _schedule_publish_articles(username, enabled=False): journal_acron=None, publication_year=None, force_update=False, + verify=False, ), description=_("Publica artigos"), priority=ARTICLE_DB_MIGRATION_PRIORITY, @@ -381,3 +388,27 @@ def _schedule_fetch_and_create_journal(username, enabled=False): hour="*", minute=FETCH_AND_CREATE_JOURNAL_MINUTES, ) + + +def _schedule_try_fetch_and_register_press_release(username, enabled=False): + """ + Agenda a tarefa de buscar e registrar press releases + Deixa a tarefa desabilitada por padrão + """ + schedule_task( + task="core.tasks.try_fetch_and_register_press_release", + name="try_fetch_and_register_press_release", + kwargs=dict( + username=username, + journal_acronym=None, + pressrelease_lang=None, + verify=False, + ), + description=_("Busca e registra press releases"), + priority=PRESS_RELEASE_PRIORITY, + enabled=enabled, + run_once=False, + day_of_week="*", + hour="*", + minute=PRESS_RELEASE_MINUTES, + ) diff --git a/collection/models.py b/collection/models.py index 6d5750f3f..cfc8f7e03 100644 --- a/collection/models.py +++ b/collection/models.py @@ -63,6 +63,25 @@ def get_website_config(self, purpose, content_type): ws = WebSiteConfiguration.get(collection=self, purpose=purpose) return ws.get_data(content_type=content_type) + @classmethod + def get_managers(cls, collection_id): + """Get all managers for this collection.""" + from team.models import CollectionTeamMember, TeamRole + return CollectionTeamMember.objects.filter( + collection_id=collection_id, + role=TeamRole.MANAGER, + is_active_member=True + ) + + @classmethod + def get_members(cls, collection_id): + """Get all active members (including managers) for this collection.""" + from team.models import CollectionTeamMember + return CollectionTeamMember.objects.filter( + collection_id=collection_id, + is_active_member=True + ) + class WebSiteConfiguration(CommonControlField, ClusterableModel): collection = models.ForeignKey( diff --git a/collection/tests.py b/collection/tests.py index 7ce503c2d..ad078696b 100644 --- a/collection/tests.py +++ b/collection/tests.py @@ -1,3 +1,280 @@ +from django.contrib.auth import get_user_model from django.test import TestCase -# Create your tests here. +from collection.models import Collection, WebSiteConfiguration +from files_storage.models import MinioConfiguration +from migration.models import ClassicWebsiteConfiguration +from team.models import CollectionTeamMember, TeamRole, get_user_membership_ids + +User = get_user_model() + + +class CollectionTeamHelperFunctionsTest(TestCase): + """Tests for get_user_membership_ids from team.models.""" + + def setUp(self): + self.creator = User.objects.create_user( + username="creator", email="creator@example.com", password="pass" + ) + self.active_member = User.objects.create_user( + username="active", email="active@example.com", password="pass" + ) + self.inactive_member = User.objects.create_user( + username="inactive", email="inactive@example.com", password="pass" + ) + self.non_member = User.objects.create_user( + username="nonmember", email="nonmember@example.com", password="pass" + ) + self.col = Collection.objects.create(acron="X", name="Collection X", creator=self.creator) + CollectionTeamMember.objects.create( + user=self.active_member, + collection=self.col, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.creator, + ) + CollectionTeamMember.objects.create( + user=self.inactive_member, + collection=self.col, + role=TeamRole.MEMBER, + is_active_member=False, + creator=self.creator, + ) + + def test_get_user_collection_ids_returns_active_memberships(self): + membership = get_user_membership_ids(self.active_member) + self.assertIn(self.col.id, membership["collection_list_ids"]) + + def test_get_user_collection_ids_excludes_inactive_memberships(self): + membership = get_user_membership_ids(self.inactive_member) + self.assertNotIn(self.col.id, membership["collection_list_ids"]) + + def test_get_user_collection_ids_empty_for_non_member(self): + membership = get_user_membership_ids(self.non_member) + self.assertFalse(membership.get("collection_list_ids")) + + def test_is_collection_team_member_true_for_active(self): + membership = get_user_membership_ids(self.active_member) + self.assertTrue(membership.get("collection_list_ids")) + + def test_is_collection_team_member_false_for_inactive(self): + membership = get_user_membership_ids(self.inactive_member) + self.assertFalse(membership.get("collection_list_ids")) + + def test_is_collection_team_member_false_for_non_member(self): + membership = get_user_membership_ids(self.non_member) + self.assertFalse(membership.get("collection_list_ids")) + + +class CollectionViewSetQueryFilterTest(TestCase): + """Tests for get_queryset filtering logic on Collection model.""" + + def setUp(self): + self.creator = User.objects.create_user( + username="creator", email="creator@example.com", password="pass" + ) + self.superuser = User.objects.create_superuser( + username="admin", email="admin@example.com", password="pass" + ) + self.collection_member = User.objects.create_user( + username="col_member", email="col@example.com", password="pass" + ) + self.other_user = User.objects.create_user( + username="other", email="other@example.com", password="pass" + ) + self.col_a = Collection.objects.create(acron="A", name="Collection A", creator=self.creator) + self.col_b = Collection.objects.create(acron="B", name="Collection B", creator=self.creator) + CollectionTeamMember.objects.create( + user=self.collection_member, + collection=self.col_a, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.creator, + ) + + def _filtered_qs(self, user): + """Simulate the CollectionViewSet.get_queryset filtering logic.""" + qs = Collection.objects.all() + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs.filter(id__in=membership["collection_list_ids"]) + return qs.none() + + def test_superuser_sees_all_collections(self): + qs = self._filtered_qs(self.superuser) + self.assertIn(self.col_a, qs) + self.assertIn(self.col_b, qs) + + def test_collection_team_member_sees_only_own_collection(self): + qs = self._filtered_qs(self.collection_member) + self.assertIn(self.col_a, qs) + self.assertNotIn(self.col_b, qs) + + def test_non_collection_team_user_sees_nothing(self): + qs = self._filtered_qs(self.other_user) + self.assertEqual(qs.count(), 0) + + +class WebSiteConfigurationQueryFilterTest(TestCase): + """Tests for get_queryset filtering logic on WebSiteConfiguration model.""" + + def setUp(self): + self.creator = User.objects.create_user( + username="creator", email="creator@example.com", password="pass" + ) + self.superuser = User.objects.create_superuser( + username="admin", email="admin@example.com", password="pass" + ) + self.collection_member = User.objects.create_user( + username="col_member", email="col@example.com", password="pass" + ) + self.other_user = User.objects.create_user( + username="other", email="other@example.com", password="pass" + ) + self.col_a = Collection.objects.create(acron="A", name="Collection A", creator=self.creator) + self.col_b = Collection.objects.create(acron="B", name="Collection B", creator=self.creator) + CollectionTeamMember.objects.create( + user=self.collection_member, + collection=self.col_a, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.creator, + ) + self.ws_a = WebSiteConfiguration.objects.create( + collection=self.col_a, url="http://a.example.com", enabled=True, creator=self.creator + ) + self.ws_b = WebSiteConfiguration.objects.create( + collection=self.col_b, url="http://b.example.com", enabled=True, creator=self.creator + ) + + def _filtered_qs(self, user): + qs = WebSiteConfiguration.objects.all() + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs.filter(collection_id__in=membership["collection_list_ids"]) + return qs.none() + + def test_superuser_sees_all_website_configs(self): + qs = self._filtered_qs(self.superuser) + self.assertIn(self.ws_a, qs) + self.assertIn(self.ws_b, qs) + + def test_collection_team_member_sees_only_own_collection_config(self): + qs = self._filtered_qs(self.collection_member) + self.assertIn(self.ws_a, qs) + self.assertNotIn(self.ws_b, qs) + + def test_non_collection_team_user_sees_nothing(self): + qs = self._filtered_qs(self.other_user) + self.assertEqual(qs.count(), 0) + + +class MinioConfigurationQueryFilterTest(TestCase): + """Tests for get_queryset filtering logic on MinioConfiguration model.""" + + def setUp(self): + self.creator = User.objects.create_user( + username="creator", email="creator@example.com", password="pass" + ) + self.superuser = User.objects.create_superuser( + username="admin", email="admin@example.com", password="pass" + ) + self.collection_member = User.objects.create_user( + username="col_member", email="col@example.com", password="pass" + ) + self.other_user = User.objects.create_user( + username="other", email="other@example.com", password="pass" + ) + self.col = Collection.objects.create(acron="A", name="Collection A", creator=self.creator) + CollectionTeamMember.objects.create( + user=self.collection_member, + collection=self.col, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.creator, + ) + self.minio = MinioConfiguration.objects.create( + name="minio1", host="minio.example.com", bucket_root="root", creator=self.creator + ) + + def _filtered_qs(self, user): + qs = MinioConfiguration.objects.all() + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs + return qs.none() + + def test_superuser_sees_all_minio_configs(self): + qs = self._filtered_qs(self.superuser) + self.assertIn(self.minio, qs) + + def test_collection_team_member_sees_all_minio_configs(self): + qs = self._filtered_qs(self.collection_member) + self.assertIn(self.minio, qs) + + def test_non_collection_team_user_sees_nothing(self): + qs = self._filtered_qs(self.other_user) + self.assertEqual(qs.count(), 0) + + +class ClassicWebsiteConfigurationQueryFilterTest(TestCase): + """Tests for get_queryset filtering logic on ClassicWebsiteConfiguration model.""" + + def setUp(self): + self.creator = User.objects.create_user( + username="creator", email="creator@example.com", password="pass" + ) + self.superuser = User.objects.create_superuser( + username="admin", email="admin@example.com", password="pass" + ) + self.collection_member = User.objects.create_user( + username="col_member", email="col@example.com", password="pass" + ) + self.other_user = User.objects.create_user( + username="other", email="other@example.com", password="pass" + ) + self.col_a = Collection.objects.create(acron="A", name="Collection A", creator=self.creator) + self.col_b = Collection.objects.create(acron="B", name="Collection B", creator=self.creator) + CollectionTeamMember.objects.create( + user=self.collection_member, + collection=self.col_a, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.creator, + ) + self.cwc_a = ClassicWebsiteConfiguration.objects.create( + collection=self.col_a, creator=self.creator + ) + self.cwc_b = ClassicWebsiteConfiguration.objects.create( + collection=self.col_b, creator=self.creator + ) + + def _filtered_qs(self, user): + qs = ClassicWebsiteConfiguration.objects.all() + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs.filter(collection_id__in=membership["collection_list_ids"]) + return qs.none() + + def test_superuser_sees_all_classic_configs(self): + qs = self._filtered_qs(self.superuser) + self.assertIn(self.cwc_a, qs) + self.assertIn(self.cwc_b, qs) + + def test_collection_team_member_sees_only_own_collection_config(self): + qs = self._filtered_qs(self.collection_member) + self.assertIn(self.cwc_a, qs) + self.assertNotIn(self.cwc_b, qs) + + def test_non_collection_team_user_sees_nothing(self): + qs = self._filtered_qs(self.other_user) + self.assertEqual(qs.count(), 0) + diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py index 700408a96..274649207 100644 --- a/collection/wagtail_hooks.py +++ b/collection/wagtail_hooks.py @@ -1,37 +1,23 @@ -from django.http import HttpResponseRedirect from django.urls import include, path from django.utils.translation import gettext_lazy as _ from wagtail import hooks -from wagtail_modeladmin.options import ( - ModelAdmin, - ModelAdminGroup, - modeladmin_register, -) -from wagtail_modeladmin.views import CreateView +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup from config.menu import get_menu_order -from files_storage.models import MinioConfiguration -from migration.models import ClassicWebsiteConfiguration +from files_storage.wagtail_hooks import MinioConfigurationViewSet +from migration.wagtail_hooks import ClassicWebsiteConfigurationViewSet +from team.models import get_user_membership_ids from .models import Collection, WebSiteConfiguration -class CoreCreateView(CreateView): - def form_valid(self, form): - self.object = form.save_all(self.request.user) - return HttpResponseRedirect(self.get_success_url()) - - -class CollectionModelAdmin(ModelAdmin): +class CollectionViewSet(SnippetViewSet): model = Collection menu_label = _("Collections") menu_icon = "doc-full" menu_order = 100 add_to_settings_menu = False - exclude_from_explorer = False - inspect_view_enabled = True - - create_view_class = CoreCreateView list_display = ( "acron", @@ -44,21 +30,23 @@ class CollectionModelAdmin(ModelAdmin): "name", "acron", ) - inspect_view_fields = ( - "name", - "acron", - ) + + def get_queryset(self, request): + qs = super().get_queryset(request) + user = request.user + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs.filter(id__in=membership["collection_list_ids"]) + return qs.none() -class WebSiteConfigurationModelAdmin(ModelAdmin): +class WebSiteConfigurationViewSet(SnippetViewSet): model = WebSiteConfiguration menu_label = _("New WebSites Configurations") menu_icon = "doc-full" menu_order = 200 - exclude_from_explorer = False - inspect_view_enabled = False - - create_view_class = CoreCreateView list_display = ( "collection", @@ -75,69 +63,26 @@ class WebSiteConfigurationModelAdmin(ModelAdmin): ) search_fields = ("url", "collection__acron", "collection__name") - -class MinioConfigurationModelAdmin(ModelAdmin): - model = MinioConfiguration - menu_label = _("Files Storage Configuration") - menu_icon = "doc-full" - menu_order = 200 - exclude_from_explorer = False - inspect_view_enabled = False - - create_view_class = CoreCreateView - - list_display = ( - "host", - "bucket_root", - "created", - "updated", - "updated_by", - ) - list_filter = ( - "host", - "bucket_root", - ) - search_fields = ( - "host", - "bucket_root", - ) - - -class ClassicWebsiteConfigurationModelAdmin(ModelAdmin): - model = ClassicWebsiteConfiguration - menu_label = _("Classic Website Configuration") - menu_icon = "doc-full" - menu_order = 200 - exclude_from_explorer = False - inspect_view_enabled = False - - create_view_class = CoreCreateView - - list_display = ("collection",) - search_fields = ( - "collection__acron", - "collection__name", - ) + def get_queryset(self, request): + qs = super().get_queryset(request) + user = request.user + if user.is_superuser: + return qs + membership = get_user_membership_ids(user) + if membership.get("collection_list_ids"): + return qs.filter(collection_id__in=membership["collection_list_ids"]) + return qs.none() -class CollectionModelAdminGroup(ModelAdminGroup): +class CollectionViewSetGroup(SnippetViewSetGroup): menu_label = _("Collections") menu_icon = "folder-open-inverse" menu_order = get_menu_order("collection") - # menu_order = 100 - items = ( - CollectionModelAdmin, - WebSiteConfigurationModelAdmin, - MinioConfigurationModelAdmin, - ClassicWebsiteConfigurationModelAdmin, - ) - - -modeladmin_register(CollectionModelAdminGroup) - - -@hooks.register("register_admin_urls") -def register_disclosure_url(): - return [ - path("collection/", include("collection.urls", namespace="collection")), + items = [ + CollectionViewSet, + WebSiteConfigurationViewSet, + MinioConfigurationViewSet, + ClassicWebsiteConfigurationViewSet, ] + +register_snippet(CollectionViewSetGroup) diff --git a/core/models.py b/core/models.py index 42f60ad0e..0c1d394a0 100644 --- a/core/models.py +++ b/core/models.py @@ -62,6 +62,21 @@ class Meta: abstract = True +class VisualIdentityMixin(models.Model): + """ + Mixin that provides url and logo fields for visual identity. + + Fields: + url: Website URL + logo: Logo image field + """ + url = models.URLField(_("URL"), blank=True, null=True) + logo = models.ImageField(_("Logo"), blank=True, null=True, upload_to="logos/") + + class Meta: + abstract = True + + class BaseTextModel(models.Model): language = models.ForeignKey( "collection.Language", null=True, blank=True, on_delete=models.SET_NULL diff --git a/core/scripts/load_pressrelease.py b/core/scripts/load_pressrelease.py index d1df85982..98bdb6439 100644 --- a/core/scripts/load_pressrelease.py +++ b/core/scripts/load_pressrelease.py @@ -1,11 +1,12 @@ from core.tasks import try_fetch_and_register_press_release -def run(journal_acronym=None, pressrelease_lang=None, username=None): +def run(journal_acronym=None, pressrelease_lang=None, username=None, verify=False): try_fetch_and_register_press_release.apply_async( kwargs=dict( journal_acronym=journal_acronym, pressrelease_lang=pressrelease_lang, username=username, + verify=verify, ) ) diff --git a/core/tasks.py b/core/tasks.py index cfc38e315..60720e766 100644 --- a/core/tasks.py +++ b/core/tasks.py @@ -29,7 +29,7 @@ @celery_app.task(bind=True) def try_fetch_and_register_press_release( - self, journal_acronym=None, pressrelease_lang=None, username=None, user_id=None + self, journal_acronym=None, pressrelease_lang=None, username=None, user_id=None, verify=False ): query_condition = Q(journal_acron=journal_acronym) if journal_acronym else Q() journals_query = Journal.objects.filter(query_condition) @@ -49,7 +49,7 @@ def try_fetch_and_register_press_release( ) response = fetch_data( - press_release_url_by_lang, json=False, timeout=2, verify=False + press_release_url_by_lang, json=False, timeout=2, verify=verify ) content = feedparser.parse(response) diff --git a/core/templates/wagtailadmin/summary_items/collection_summary_item.html b/core/templates/wagtailadmin/summary_items/collection_summary_item.html index 74af48019..f51670c96 100644 --- a/core/templates/wagtailadmin/summary_items/collection_summary_item.html +++ b/core/templates/wagtailadmin/summary_items/collection_summary_item.html @@ -2,7 +2,7 @@
  • {% icon name="site" %} - + {% blocktrans trimmed count counter=total_collection with total_collection|intcomma as total %} {{ total_collection }} Collection {% plural %} diff --git a/core/utils/harvesters.py b/core/utils/harvesters.py index 188f7bf7f..7f192c5b3 100644 --- a/core/utils/harvesters.py +++ b/core/utils/harvesters.py @@ -34,7 +34,10 @@ def __init__( limit: Número de documentos por página timeout: Timeout em segundos para requisições """ + if not domain.startswith("http"): + domain = f"http://{domain}" self.domain = domain + self.collection_acron = collection_acron self.from_date = from_date or "2000-01-01" self.until_date = until_date or datetime.now(timezone.utc).isoformat()[:10] @@ -66,7 +69,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: try: # Constrói URL url = ( - f"https://{self.domain}/api/v1/counter_dict?" + f"{self.domain}/api/v1/counter_dict?" f"end_date={self.until_date}&begin_date={self.from_date}" f"&limit={self.limit}&page={page}" ) @@ -74,7 +77,8 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: logger.info(f"Fetching OPAC documents from: {url}") # Faz requisição - response = fetch_data(url, json=True, timeout=self.timeout, verify=True) + response = fetch_data(url, json=True, timeout=self.timeout) + # Define total de páginas na primeira iteração if total_pages is None: @@ -95,7 +99,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: # Constrói URL do XML journal_acron = item["journal_acronym"] - xml_url = f"https://{self.domain}/j/{journal_acron}/a/{pid_v3}/?format=xml" + xml_url = f"{self.domain}/j/{journal_acron}/a/{pid_v3}/?format=xml" # Extrai data de origem origin_date = self._parse_gmt_date( @@ -138,7 +142,16 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: except Exception as e: logger.error(f"Error harvesting OPAC documents on page {page}: {e}") - break + if total_pages is None: + # First page failed, we don't know pagination; stop + break + # Known pagination: skip this page and try the next. + # Must increment here because the normal page += 1 + # inside the try block was not reached. + page += 1 + if page > total_pages: + break + continue def _parse_gmt_date(self, date_str: Optional[str]) -> Optional[str]: """ diff --git a/core/utils/requester.py b/core/utils/requester.py index 86c5664d9..5bc78ad38 100644 --- a/core/utils/requester.py +++ b/core/utils/requester.py @@ -161,4 +161,13 @@ def fetch_data(url, params=None, headers=None, json=False, timeout=2, verify=Tru else: raise - return response.content if not json else response.json() + if not json: + return response.content + + try: + return response.json() + except ValueError as exc: + raise NonRetryableError( + "Invalid JSON response (status=%s, length=%s) from %s: %s" + % (response.status_code, len(response.content), url, exc) + ) from exc diff --git a/core/utils/sanitize.py b/core/utils/sanitize.py new file mode 100644 index 000000000..c23913b47 --- /dev/null +++ b/core/utils/sanitize.py @@ -0,0 +1,20 @@ +def sanitize_for_json(obj): + """Recursively sanitize data to remove Unicode surrogate characters. + + Surrogate characters (U+D800-U+DFFF) are invalid in JSON and rejected by + PostgreSQL. They can appear in file paths read from filesystems using + Python's 'surrogateescape' error handler. + """ + if isinstance(obj, str): + # Encode using surrogateescape to recover original bytes from surrogates, + # then decode as UTF-8 replacing any invalid sequences. + # Fall back to surrogatepass for surrogates outside the DC80-DCFF range. + try: + return obj.encode("utf-8", "surrogateescape").decode("utf-8", "replace") + except UnicodeEncodeError: + return obj.encode("utf-8", "surrogatepass").decode("utf-8", "replace") + if isinstance(obj, dict): + return {sanitize_for_json(k): sanitize_for_json(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [sanitize_for_json(item) for item in obj] + return obj diff --git a/core/utils/test_harvesters.py b/core/utils/test_harvesters.py new file mode 100644 index 000000000..01f7a591a --- /dev/null +++ b/core/utils/test_harvesters.py @@ -0,0 +1,137 @@ +from unittest import TestCase +from unittest.mock import patch + +from core.utils.harvesters import OPACHarvester +from core.utils.requester import NonRetryableError + + +class OPACHarvesterTest(TestCase): + """Tests for OPACHarvester.harvest_documents error handling.""" + + def _make_harvester(self, **kwargs): + defaults = dict( + domain="www.example.com", + collection_acron="scl", + from_date="2024-01-01", + until_date="2024-12-31", + limit=10, + timeout=2, + ) + defaults.update(kwargs) + return OPACHarvester(**defaults) + + @patch("core.utils.harvesters.fetch_data") + def test_harvest_documents_stops_when_first_page_fails(self, mock_fetch): + """When the first page (total_pages unknown) fails, harvesting stops.""" + mock_fetch.side_effect = NonRetryableError("Invalid JSON response") + + harvester = self._make_harvester() + documents = list(harvester.harvest_documents()) + + self.assertEqual(documents, []) + self.assertEqual(mock_fetch.call_count, 1) + + @patch("core.utils.harvesters.fetch_data") + def test_harvest_documents_skips_failing_page_when_total_pages_known( + self, mock_fetch + ): + """When total_pages is known and a middle page fails, harvesting continues.""" + page1_response = { + "pages": 3, + "documents": { + "abc123": { + "journal_acronym": "jtest", + "pid_v2": "S0001-00002024000100001", + "publication_date": "2024-01-01", + }, + }, + } + page3_response = { + "pages": 3, + "documents": { + "def456": { + "journal_acronym": "jtest", + "pid_v2": "S0001-00002024000100002", + "publication_date": "2024-06-01", + }, + }, + } + mock_fetch.side_effect = [ + page1_response, + NonRetryableError("page 2 error"), + page3_response, + ] + + harvester = self._make_harvester() + documents = list(harvester.harvest_documents()) + + self.assertEqual(len(documents), 2) + self.assertEqual(documents[0]["pid_v3"], "abc123") + self.assertEqual(documents[1]["pid_v3"], "def456") + self.assertEqual(mock_fetch.call_count, 3) + + @patch("core.utils.harvesters.fetch_data") + def test_harvest_documents_returns_documents_on_success(self, mock_fetch): + """Documents are yielded correctly from a successful response.""" + mock_fetch.return_value = { + "pages": 1, + "documents": { + "xyz789": { + "journal_acronym": "jtest", + "pid_v1": "v1", + "pid_v2": "v2", + "publication_date": "2024-03-15", + "default_language": "en", + }, + }, + } + + harvester = self._make_harvester() + documents = list(harvester.harvest_documents()) + + self.assertEqual(len(documents), 1) + doc = documents[0] + self.assertEqual(doc["pid_v3"], "xyz789") + self.assertEqual(doc["pid_v1"], "v1") + self.assertEqual(doc["pid_v2"], "v2") + self.assertEqual(doc["journal_acron"], "jtest") + self.assertEqual(doc["collection_acron"], "scl") + self.assertIn("format=xml", doc["url"]) + + @patch("core.utils.harvesters.fetch_data") + def test_harvest_documents_stops_on_empty_documents(self, mock_fetch): + """When a page returns no documents, harvesting stops.""" + mock_fetch.return_value = { + "pages": 3, + "documents": {}, + } + + harvester = self._make_harvester() + documents = list(harvester.harvest_documents()) + + self.assertEqual(documents, []) + self.assertEqual(mock_fetch.call_count, 1) + + @patch("core.utils.harvesters.fetch_data") + def test_harvest_documents_stops_after_last_page_fails(self, mock_fetch): + """When the error is on the last page, harvesting still stops gracefully.""" + page1_response = { + "pages": 2, + "documents": { + "abc123": { + "journal_acronym": "jtest", + "pid_v2": "S0001-00002024000100001", + "publication_date": "2024-01-01", + }, + }, + } + mock_fetch.side_effect = [ + page1_response, + NonRetryableError("last page error"), + ] + + harvester = self._make_harvester() + documents = list(harvester.harvest_documents()) + + self.assertEqual(len(documents), 1) + self.assertEqual(documents[0]["pid_v3"], "abc123") diff --git a/core/utils/test_requester.py b/core/utils/test_requester.py new file mode 100644 index 000000000..4ed991e3a --- /dev/null +++ b/core/utils/test_requester.py @@ -0,0 +1,71 @@ +from unittest import TestCase +from unittest.mock import Mock, patch + +from core.utils.requester import NonRetryableError, fetch_data + + +class FetchDataJsonDecodeErrorTest(TestCase): + """Tests for fetch_data handling of invalid JSON responses.""" + + @patch("core.utils.requester.requests.get") + def test_fetch_data_raises_non_retryable_error_on_empty_json_response( + self, mock_get + ): + """When json=True and response body is empty, fetch_data should raise + NonRetryableError instead of letting JSONDecodeError propagate.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b"" + mock_response.raise_for_status.return_value = None + mock_response.json.side_effect = ValueError( + "Expecting value: line 1 column 1 (char 0)" + ) + mock_get.return_value = mock_response + + with self.assertRaises(NonRetryableError): + fetch_data("https://example.com/api", json=True) + + @patch("core.utils.requester.requests.get") + def test_fetch_data_raises_non_retryable_error_on_html_json_response( + self, mock_get + ): + """When json=True and response body is HTML, fetch_data should raise + NonRetryableError.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b"Not Found" + mock_response.raise_for_status.return_value = None + mock_response.json.side_effect = ValueError("Expecting value") + mock_get.return_value = mock_response + + with self.assertRaises(NonRetryableError) as ctx: + fetch_data("https://example.com/api", json=True) + + self.assertIn("Invalid JSON response", str(ctx.exception)) + + @patch("core.utils.requester.requests.get") + def test_fetch_data_returns_json_on_valid_response(self, mock_get): + """When json=True and response body is valid JSON, return parsed dict.""" + expected = {"documents": {}, "pages": 1} + mock_response = Mock() + mock_response.status_code = 200 + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = expected + mock_get.return_value = mock_response + + result = fetch_data("https://example.com/api", json=True) + + self.assertEqual(result, expected) + + @patch("core.utils.requester.requests.get") + def test_fetch_data_returns_content_when_json_false(self, mock_get): + """When json=False, return raw response content.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b"raw content" + mock_response.raise_for_status.return_value = None + mock_get.return_value = mock_response + + result = fetch_data("https://example.com/api", json=False) + + self.assertEqual(result, b"raw content") diff --git a/docs/index.rst b/docs/index.rst index 24b2a4860..3966e25e8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,13 @@ Welcome to SciELO Upload 's documentation! howto users +Task Guides / Guías de Tareas / Guias de Tarefas +---------------------------------------------------------------------- + +- `English `_ +- `Español `_ +- `Português `_ + Indices and tables diff --git a/docs/pid_provider/guide_task_load_records_en.md b/docs/pid_provider/guide_task_load_records_en.md new file mode 100644 index 000000000..1c5ecadcb --- /dev/null +++ b/docs/pid_provider/guide_task_load_records_en.md @@ -0,0 +1,180 @@ +# Guide: Loading Records from counter_dict + +## Purpose + +The `task_load_records_from_counter_dict` task collects XML documents from a +specific SciELO collection via the OPAC `counter_dict` API endpoint and loads +them into the system. + +The records harvested from the OPAC already contain an assigned **PID v3** +identifier. For this reason, the data source **must** be the official SciELO +website (e.g., `https://www.scielo.br`). Using any other source could result in +the creation of unprecedented PID v3 identifiers that do not correspond to +published articles, which would compromise data integrity. + +**What it does:** + +1. Connects to the OPAC API (e.g., `https://www.scielo.br/api/v1/counter_dict`) +2. Retrieves document metadata from the specified collection and date range +3. For each document found, dispatches a subtask (`task_load_record_from_xml_url`) + that downloads the XML and creates or updates records in **PidProviderXML** and **XMLURL** + +> **Important:** This task **only** creates or updates `PidProviderXML` and +> `XMLURL` records. It does **not** create `Article` records. + +--- + +## Creating a Periodic Task via Django Admin + +### Step 1: Access Django Admin + +1. Open your browser and navigate to the Django admin panel: + `https:///admin/` +2. Log in with a superuser account + +### Step 2: Navigate to Periodic Tasks + +1. In the Django admin sidebar or main page, locate the section + **PERIODIC TASKS** (provided by `django_celery_beat`) +2. Click on **Periodic tasks** +3. Click the **Add periodic task** button (top right) + +### Step 3: Configure the Task + +Fill in the following fields: + +- **Name**: A descriptive name, e.g., + `Load OPAC records - Brazil (scl)` +- **Task (registered)**: Select or type: + `pid_provider.tasks.task_load_records_from_counter_dict` +- **Enabled**: Check this box to activate the task +- **Schedule**: Choose one of the available schedule types: + - **Interval**: e.g., every 24 hours + - **Crontab**: e.g., `0 2 * * *` (every day at 2:00 AM) + - **One-off task**: Check this if the task should run only once + +### Step 4: Configure Keyword Arguments (kwargs) + +In the **Keyword arguments** field (JSON format), enter the task parameters: + +```json +{ + "username": "admin", + "collection_acron": "scl", + "from_date": "2024-01-01", + "until_date": "2024-12-31", + "limit": 100, + "timeout": 5, + "force_update": false, + "opac_domain": "https://www.scielo.br" +} +``` + +### Step 5: Save + +Click **Save** to create the periodic task. The Celery Beat scheduler will +pick it up according to the configured schedule. + +--- + +## Parameter Reference + +| Parameter | Type | Default | Description | +|---|---|---|---| +| `username` | str | `None` | Username of the user running the task. | +| `user_id` | int | `None` | User ID (alternative to `username`). | +| `collection_acron` | str | `"scl"` | Collection acronym (e.g., `"scl"` for Brazil). | +| `from_date` | str | `"2000-01-01"` | Start date in ISO format (`YYYY-MM-DD`). | +| `until_date` | str | today | End date in ISO format (`YYYY-MM-DD`). | +| `limit` | int | `100` | Number of documents per API page. | +| `timeout` | int | `5` | HTTP request timeout in seconds. | +| `force_update` | bool | `false` | Force update even if the record already exists. | +| `opac_domain` | str | `"www.scielo.br"` | OPAC domain to harvest from. **Must include the protocol** (e.g., `https://www.scielo.br`). Using `https://` ensures a secure connection to the official SciELO website. If the protocol is omitted, the system defaults to `http://`. | + +--- + +## Verifying Results + +After the task completes, verify the results by checking the +`PidProviderXML` and `XMLURL` models. + +### Checking PidProviderXML Records + +In the Django admin, navigate to **PID PROVIDER > PidProviderXML** to see the +created or updated records. + +Alternatively, use the Django shell: + +```python +from pid_provider.models import PidProviderXML + +# List recent records +recent = PidProviderXML.objects.order_by("-updated")[:20] +for record in recent: + print(f"PID v3: {record.v3} | Status: {record.proc_status} | " + f"Updated: {record.updated}") + +# Filter by collection +from collection.models import Collection +col = Collection.objects.get(acron="scl") +records = PidProviderXML.objects.filter(collections=col) +print(f"Total records for 'scl': {records.count()}") +``` + +### Checking XMLURL Records + +In the Django admin, navigate to **PID PROVIDER > XMLURL** to see the +URL processing status. + +```python +from pid_provider.models import XMLURL + +# List recent XMLURL records +recent_urls = XMLURL.objects.order_by("-updated")[:20] +for url_record in recent_urls: + print(f"URL: {url_record.url} | Status: {url_record.status} | " + f"PID: {url_record.pid}") + +# Check for failed URLs +failed = XMLURL.objects.exclude(status="").exclude(exceptions="") +print(f"URLs with errors: {failed.count()}") +``` + +--- + +## Troubleshooting + +Issues are categorized by criticality level: + +- 🔴 **CRITICAL** — Prevents task execution entirely; must be resolved immediately +- 🟡 **MODERATE** — Task runs but produces incomplete or incorrect results +- 🟢 **LOW** — Minor issues that do not affect core functionality + +| Level | Problem | Solution | +|---|---|---| +| 🔴 | Celery worker is not running | Start the Celery worker: `celery -A config worker -l info`. Without a running worker, no tasks will be processed. | +| 🔴 | Celery Beat scheduler is not running | Start Celery Beat: `celery -A config beat -l info`. Without Beat, periodic tasks will not be dispatched. | +| 🔴 | Connection to OPAC API fails (network/DNS error) | Verify that the `opac_domain` is reachable and includes `https://`. Check firewall rules and DNS resolution. Review `UnexpectedEvent` records for error details. | +| 🟡 | No records appear after task execution | Check that `from_date` and `until_date` cover a range with published documents. Verify the `collection_acron` is correct. Review Celery worker logs for warnings. | +| 🟡 | Records exist but are not updated | Set `force_update` to `true` in the kwargs to force reprocessing of existing records. | +| 🟡 | XMLURL records show error status | Check the `exceptions` field in `XMLURL` for details. Common causes include invalid XML URLs or temporary server errors. The records will be retried in future runs. | +| 🟢 | Task runs slowly | Reduce the `limit` parameter to process fewer documents per page, or increase `timeout` if the OPAC API is slow. This does not affect data correctness. | +| 🟢 | Some documents are skipped with warnings | Documents without a valid `journal_acronym` are skipped. Check Celery worker logs for `WARNING` messages. These are typically incomplete records in the OPAC API. | + +### Checking Error Logs + +Errors are recorded in the `UnexpectedEvent` model: + +```python +from tracker.models import UnexpectedEvent + +errors = UnexpectedEvent.objects.filter( + detail__task="task_load_records_from_counter_dict" +).order_by("-created")[:10] + +for error in errors: + print(f"Date: {error.created}") + print(f"Exception: {error.exception}") + print(f"Details: {error.detail}") + print("---") +``` diff --git a/docs/pid_provider/guide_task_load_records_es.md b/docs/pid_provider/guide_task_load_records_es.md new file mode 100644 index 000000000..1082e392e --- /dev/null +++ b/docs/pid_provider/guide_task_load_records_es.md @@ -0,0 +1,182 @@ +# Guía: Carga de Registros desde counter_dict + +## Propósito + +La tarea `task_load_records_from_counter_dict` recopila documentos XML de una +colección específica de SciELO a través del endpoint `counter_dict` de la API +del OPAC y los carga en el sistema. + +Los registros recopilados del OPAC ya contienen un identificador **PID v3** +asignado. Por esta razón, la fuente de datos **debe** ser el sitio web oficial +de SciELO (ej., `https://www.scielo.br`). Usar cualquier otra fuente podría +resultar en la creación de identificadores PID v3 inéditos que no corresponden +a artículos publicados, lo que comprometería la integridad de los datos. + +**Qué hace:** + +1. Se conecta a la API del OPAC (ej., `https://www.scielo.br/api/v1/counter_dict`) +2. Obtiene metadatos de documentos de la colección y rango de fechas especificados +3. Para cada documento encontrado, despacha una subtarea + (`task_load_record_from_xml_url`) que descarga el XML y crea o actualiza + registros en **PidProviderXML** y **XMLURL** + +> **Importante:** Esta tarea **solo** crea o actualiza registros `PidProviderXML` +> y `XMLURL`. **No** crea registros de `Article`. + +--- + +## Creación de una Tarea Periódica vía Django Admin + +### Paso 1: Acceder a Django Admin + +1. Abra su navegador y navegue al panel de administración de Django: + `https:///admin/` +2. Inicie sesión con una cuenta de superusuario + +### Paso 2: Navegar a Tareas Periódicas + +1. En la barra lateral o página principal de Django admin, localice la + sección **PERIODIC TASKS** (provista por `django_celery_beat`) +2. Haga clic en **Periodic tasks** +3. Haga clic en el botón **Add periodic task** (esquina superior derecha) + +### Paso 3: Configurar la Tarea + +Complete los siguientes campos: + +- **Name**: Un nombre descriptivo, ej., + `Cargar registros OPAC - Brasil (scl)` +- **Task (registered)**: Seleccione o escriba: + `pid_provider.tasks.task_load_records_from_counter_dict` +- **Enabled**: Marque esta casilla para activar la tarea +- **Schedule**: Elija uno de los tipos de programación disponibles: + - **Interval**: ej., cada 24 horas + - **Crontab**: ej., `0 2 * * *` (todos los días a las 2:00 AM) + - **One-off task**: Marque esta opción si la tarea debe ejecutarse solo una vez + +### Paso 4: Configurar los Argumentos de Palabra Clave (kwargs) + +En el campo **Keyword arguments** (formato JSON), ingrese los parámetros de la +tarea: + +```json +{ + "username": "admin", + "collection_acron": "scl", + "from_date": "2024-01-01", + "until_date": "2024-12-31", + "limit": 100, + "timeout": 5, + "force_update": false, + "opac_domain": "https://www.scielo.br" +} +``` + +### Paso 5: Guardar + +Haga clic en **Save** para crear la tarea periódica. El programador Celery Beat +la ejecutará según la programación configurada. + +--- + +## Referencia de Parámetros + +| Parámetro | Tipo | Predeterminado | Descripción | +|---|---|---|---| +| `username` | str | `None` | Nombre de usuario que ejecuta la tarea. | +| `user_id` | int | `None` | ID de usuario (alternativa a `username`). | +| `collection_acron` | str | `"scl"` | Acrónimo de la colección (ej., `"scl"` para Brasil). | +| `from_date` | str | `"2000-01-01"` | Fecha de inicio en formato ISO (`YYYY-MM-DD`). | +| `until_date` | str | hoy | Fecha final en formato ISO (`YYYY-MM-DD`). | +| `limit` | int | `100` | Número de documentos por página de la API. | +| `timeout` | int | `5` | Tiempo de espera de la solicitud HTTP en segundos. | +| `force_update` | bool | `false` | Forzar la actualización aunque el registro ya exista. | +| `opac_domain` | str | `"www.scielo.br"` | Dominio del OPAC desde donde recopilar. **Debe incluir el protocolo** (ej., `https://www.scielo.br`). Usar `https://` garantiza una conexión segura con el sitio web oficial de SciELO. Si se omite el protocolo, el sistema usará `http://` por defecto. | + +--- + +## Verificación de Resultados + +Después de que la tarea se complete, verifique los resultados comprobando los +modelos `PidProviderXML` y `XMLURL`. + +### Verificar Registros PidProviderXML + +En Django admin, navegue a **PID PROVIDER > PidProviderXML** para ver los +registros creados o actualizados. + +Alternativamente, use el shell de Django: + +```python +from pid_provider.models import PidProviderXML + +# Listar registros recientes +recientes = PidProviderXML.objects.order_by("-updated")[:20] +for registro in recientes: + print(f"PID v3: {registro.v3} | Estado: {registro.proc_status} | " + f"Actualizado: {registro.updated}") + +# Filtrar por colección +from collection.models import Collection +col = Collection.objects.get(acron="scl") +registros = PidProviderXML.objects.filter(collections=col) +print(f"Total de registros para 'scl': {registros.count()}") +``` + +### Verificar Registros XMLURL + +En Django admin, navegue a **PID PROVIDER > XMLURL** para ver el estado de +procesamiento de URLs. + +```python +from pid_provider.models import XMLURL + +# Listar registros XMLURL recientes +urls_recientes = XMLURL.objects.order_by("-updated")[:20] +for url_registro in urls_recientes: + print(f"URL: {url_registro.url} | Estado: {url_registro.status} | " + f"PID: {url_registro.pid}") + +# Verificar URLs fallidas +fallidas = XMLURL.objects.exclude(status="").exclude(exceptions="") +print(f"URLs con errores: {fallidas.count()}") +``` + +--- + +## Solución de Problemas + +Los problemas se categorizan por nivel de criticidad: + +- 🔴 **CRÍTICO** — Impide la ejecución de la tarea por completo; debe resolverse inmediatamente +- 🟡 **MODERADO** — La tarea se ejecuta pero produce resultados incompletos o incorrectos +- 🟢 **BAJO** — Problemas menores que no afectan la funcionalidad principal + +| Nivel | Problema | Solución | +|---|---|---| +| 🔴 | El worker de Celery no está en ejecución | Inicie el worker de Celery: `celery -A config worker -l info`. Sin un worker en ejecución, ninguna tarea será procesada. | +| 🔴 | El programador Celery Beat no está en ejecución | Inicie Celery Beat: `celery -A config beat -l info`. Sin Beat, las tareas periódicas no serán despachadas. | +| 🔴 | La conexión a la API del OPAC falla (error de red/DNS) | Verifique que el `opac_domain` sea accesible e incluya `https://`. Revise las reglas de firewall y la resolución DNS. Consulte los registros de `UnexpectedEvent` para obtener detalles del error. | +| 🟡 | No aparecen registros después de la ejecución de la tarea | Verifique que `from_date` y `until_date` cubran un rango con documentos publicados. Confirme que `collection_acron` sea correcto. Revise los logs del worker de Celery para advertencias. | +| 🟡 | Los registros existen pero no se actualizan | Establezca `force_update` en `true` en los kwargs para forzar el reprocesamiento de registros existentes. | +| 🟡 | Los registros XMLURL muestran estado de error | Verifique el campo `exceptions` en `XMLURL` para obtener detalles. Las causas comunes incluyen URLs de XML inválidas o errores temporales del servidor. Los registros serán reintentados en ejecuciones futuras. | +| 🟢 | La tarea se ejecuta lentamente | Reduzca el parámetro `limit` para procesar menos documentos por página, o aumente `timeout` si la API del OPAC es lenta. Esto no afecta la corrección de los datos. | +| 🟢 | Algunos documentos se omiten con advertencias | Los documentos sin un `journal_acronym` válido se omiten. Revise los logs del worker de Celery para mensajes de `WARNING`. Estos son típicamente registros incompletos en la API del OPAC. | + +### Verificar Logs de Errores + +Los errores se registran en el modelo `UnexpectedEvent`: + +```python +from tracker.models import UnexpectedEvent + +errores = UnexpectedEvent.objects.filter( + detail__task="task_load_records_from_counter_dict" +).order_by("-created")[:10] + +for error in errores: + print(f"Fecha: {error.created}") + print(f"Excepción: {error.exception}") + print(f"Detalles: {error.detail}") + print("---") +``` diff --git a/docs/pid_provider/guide_task_load_records_pt_br.md b/docs/pid_provider/guide_task_load_records_pt_br.md new file mode 100644 index 000000000..8f47b8d46 --- /dev/null +++ b/docs/pid_provider/guide_task_load_records_pt_br.md @@ -0,0 +1,181 @@ +# Guia: Carregamento de Registros a partir do counter_dict + +## Propósito + +A tarefa `task_load_records_from_counter_dict` coleta documentos XML de uma +coleção específica do SciELO por meio do endpoint `counter_dict` da API do +OPAC e os carrega no sistema. + +Os registros coletados do OPAC já possuem um identificador **PID v3** atribuído. +Por essa razão, a fonte de dados **deve** ser o site oficial do SciELO +(ex., `https://www.scielo.br`). Usar qualquer outra fonte pode resultar na +criação de identificadores PID v3 inéditos que não correspondem a artigos +publicados, comprometendo a integridade dos dados. + +**O que ela faz:** + +1. Conecta-se à API do OPAC (ex., `https://www.scielo.br/api/v1/counter_dict`) +2. Obtém metadados dos documentos da coleção e intervalo de datas especificados +3. Para cada documento encontrado, despacha uma subtarefa + (`task_load_record_from_xml_url`) que baixa o XML e cria ou atualiza + registros em **PidProviderXML** e **XMLURL** + +> **Importante:** Esta tarefa **apenas** cria ou atualiza registros +> `PidProviderXML` e `XMLURL`. Ela **não** cria registros de `Article`. + +--- + +## Criação de uma Tarefa Periódica via Django Admin + +### Passo 1: Acessar o Django Admin + +1. Abra seu navegador e acesse o painel de administração do Django: + `https:///admin/` +2. Faça login com uma conta de superusuário + +### Passo 2: Navegar até Tarefas Periódicas + +1. Na barra lateral ou página principal do Django admin, localize a seção + **PERIODIC TASKS** (fornecida pelo `django_celery_beat`) +2. Clique em **Periodic tasks** +3. Clique no botão **Add periodic task** (canto superior direito) + +### Passo 3: Configurar a Tarefa + +Preencha os seguintes campos: + +- **Name**: Um nome descritivo, ex., + `Carregar registros OPAC - Brasil (scl)` +- **Task (registered)**: Selecione ou digite: + `pid_provider.tasks.task_load_records_from_counter_dict` +- **Enabled**: Marque esta caixa para ativar a tarefa +- **Schedule**: Escolha um dos tipos de agendamento disponíveis: + - **Interval**: ex., a cada 24 horas + - **Crontab**: ex., `0 2 * * *` (todos os dias às 2:00 AM) + - **One-off task**: Marque esta opção se a tarefa deve executar apenas uma vez + +### Passo 4: Configurar os Argumentos de Palavra-Chave (kwargs) + +No campo **Keyword arguments** (formato JSON), insira os parâmetros da tarefa: + +```json +{ + "username": "admin", + "collection_acron": "scl", + "from_date": "2024-01-01", + "until_date": "2024-12-31", + "limit": 100, + "timeout": 5, + "force_update": false, + "opac_domain": "https://www.scielo.br" +} +``` + +### Passo 5: Salvar + +Clique em **Save** para criar a tarefa periódica. O agendador Celery Beat irá +executá-la conforme o agendamento configurado. + +--- + +## Referência de Parâmetros + +| Parâmetro | Tipo | Padrão | Descrição | +|---|---|---|---| +| `username` | str | `None` | Nome do usuário que executa a tarefa. | +| `user_id` | int | `None` | ID do usuário (alternativa ao `username`). | +| `collection_acron` | str | `"scl"` | Acrônimo da coleção (ex., `"scl"` para Brasil). | +| `from_date` | str | `"2000-01-01"` | Data inicial no formato ISO (`YYYY-MM-DD`). | +| `until_date` | str | hoje | Data final no formato ISO (`YYYY-MM-DD`). | +| `limit` | int | `100` | Número de documentos por página da API. | +| `timeout` | int | `5` | Tempo limite da requisição HTTP em segundos. | +| `force_update` | bool | `false` | Forçar atualização mesmo se o registro já existir. | +| `opac_domain` | str | `"www.scielo.br"` | Domínio do OPAC de onde coletar. **Deve incluir o protocolo** (ex., `https://www.scielo.br`). Usar `https://` garante uma conexão segura com o site oficial do SciELO. Se o protocolo for omitido, o sistema usará `http://` por padrão. | + +--- + +## Verificação dos Resultados + +Após a conclusão da tarefa, verifique os resultados consultando os modelos +`PidProviderXML` e `XMLURL`. + +### Verificar Registros PidProviderXML + +No Django admin, navegue até **PID PROVIDER > PidProviderXML** para ver os +registros criados ou atualizados. + +Alternativamente, utilize o shell do Django: + +```python +from pid_provider.models import PidProviderXML + +# Listar registros recentes +recentes = PidProviderXML.objects.order_by("-updated")[:20] +for registro in recentes: + print(f"PID v3: {registro.v3} | Status: {registro.proc_status} | " + f"Atualizado: {registro.updated}") + +# Filtrar por coleção +from collection.models import Collection +col = Collection.objects.get(acron="scl") +registros = PidProviderXML.objects.filter(collections=col) +print(f"Total de registros para 'scl': {registros.count()}") +``` + +### Verificar Registros XMLURL + +No Django admin, navegue até **PID PROVIDER > XMLURL** para ver o status de +processamento das URLs. + +```python +from pid_provider.models import XMLURL + +# Listar registros XMLURL recentes +urls_recentes = XMLURL.objects.order_by("-updated")[:20] +for url_registro in urls_recentes: + print(f"URL: {url_registro.url} | Status: {url_registro.status} | " + f"PID: {url_registro.pid}") + +# Verificar URLs com falha +com_falha = XMLURL.objects.exclude(status="").exclude(exceptions="") +print(f"URLs com erros: {com_falha.count()}") +``` + +--- + +## Solução de Problemas + +Os problemas são categorizados por nível de criticidade: + +- 🔴 **CRÍTICO** — Impede a execução da tarefa completamente; deve ser resolvido imediatamente +- 🟡 **MODERADO** — A tarefa executa, mas produz resultados incompletos ou incorretos +- 🟢 **BAIXO** — Problemas menores que não afetam a funcionalidade principal + +| Nível | Problema | Solução | +|---|---|---| +| 🔴 | O worker do Celery não está em execução | Inicie o worker do Celery: `celery -A config worker -l info`. Sem um worker em execução, nenhuma tarefa será processada. | +| 🔴 | O agendador Celery Beat não está em execução | Inicie o Celery Beat: `celery -A config beat -l info`. Sem o Beat, as tarefas periódicas não serão despachadas. | +| 🔴 | Falha na conexão com a API do OPAC (erro de rede/DNS) | Verifique se o `opac_domain` está acessível e inclui `https://`. Revise as regras de firewall e a resolução DNS. Consulte os registros de `UnexpectedEvent` para detalhes do erro. | +| 🟡 | Nenhum registro aparece após a execução da tarefa | Verifique se `from_date` e `until_date` cobrem um intervalo com documentos publicados. Confirme se `collection_acron` está correto. Revise os logs do worker do Celery para avisos. | +| 🟡 | Os registros existem mas não são atualizados | Defina `force_update` como `true` nos kwargs para forçar o reprocessamento de registros existentes. | +| 🟡 | Registros XMLURL mostram status de erro | Verifique o campo `exceptions` no `XMLURL` para detalhes. Causas comuns incluem URLs de XML inválidas ou erros temporários do servidor. Os registros serão reprocessados em execuções futuras. | +| 🟢 | A tarefa executa lentamente | Reduza o parâmetro `limit` para processar menos documentos por página, ou aumente `timeout` se a API do OPAC estiver lenta. Isso não afeta a correção dos dados. | +| 🟢 | Alguns documentos são ignorados com avisos | Documentos sem um `journal_acronym` válido são ignorados. Verifique os logs do worker do Celery para mensagens de `WARNING`. Estes são tipicamente registros incompletos na API do OPAC. | + +### Verificar Logs de Erros + +Os erros são registrados no modelo `UnexpectedEvent`: + +```python +from tracker.models import UnexpectedEvent + +erros = UnexpectedEvent.objects.filter( + detail__task="task_load_records_from_counter_dict" +).order_by("-created")[:10] + +for erro in erros: + print(f"Data: {erro.created}") + print(f"Exceção: {erro.exception}") + print(f"Detalhes: {erro.detail}") + print("---") +``` diff --git a/docs/processing/guide_task_track_classic_website_article_pids_en.md b/docs/processing/guide_task_track_classic_website_article_pids_en.md new file mode 100644 index 000000000..bbda1623d --- /dev/null +++ b/docs/processing/guide_task_track_classic_website_article_pids_en.md @@ -0,0 +1,148 @@ +# Guide: task_track_classic_website_article_pids + +## Purpose + +The task `proc.tasks.task_track_classic_website_article_pids` allows you to +**identify the completeness of the migration from the classic SciELO website**. +It does so by comparing the complete list of article PIDs from the classic +website (read from the text file configured in +`ClassicWebsiteConfiguration.pid_list_path`) against each `ArticleProc` record +that has already been created in the system. + +After the comparison, the task updates the `pid_status` field of every +`ArticleProc` record in the collection so that you can quickly see how much of +the migration has been completed and which articles still need attention: + +| Status | Meaning | +|---|---| +| **matched** | The PID exists in the classic website list **and** the corresponding `ArticleProc` already has linked `migrated_data`. The migration of this article is complete. | +| **missing** | The PID exists in the classic website list **and** an `ArticleProc` exists, but it does **not** have linked `migrated_data`. The article has not been migrated yet. | +| **exceeding** | The `ArticleProc` exists in the system but its PID was **not** found in the classic website PID list. It may be a record that no longer exists in the classic website. | + +New PIDs found in the classic website file that do not yet have a corresponding +`ArticleProc` record are automatically created with status **missing**. + +By running this task periodically you can monitor migration progress and +identify articles that still require migration or investigation. + +### Prerequisites + +| Requirement | Details | +|---|---| +| **Classic Website Configuration** | A `ClassicWebsiteConfiguration` record must exist for the target collection, with a valid `pid_list_path` pointing to a text file containing one PID per line. | +| **Collection** | At least one `Collection` record must exist. | +| **User** | A valid Django user (by `username` or `user_id`). | + +--- + +## How to create the periodic task (django_celery_beat) + +### Step 1 – Access the administration panel + +1. Log in to the Wagtail admin panel (e.g. `https:///admin/`). +2. In the left sidebar menu, go to **Settings > Periodic tasks**. + +> If you do not see this menu item, make sure the `django_celery_beat` app is +> installed and that your user has the appropriate permissions. + +### Step 2 – Create a new Periodic Task + +1. Click **Add periodic task**. +2. Fill in the required fields: + +| Field | Value | +|---|---| +| **Name** | A descriptive name, e.g. `Track Classic Website PIDs (scl)` | +| **Task (registered)** | Select `proc.tasks.task_track_classic_website_article_pids` | +| **Enabled** | ✅ Checked | + +3. Choose a schedule. For example, to run once a day, create or select an + **Interval Schedule** of `1 day` or a **Crontab Schedule** such as + `0 3 * * *` (every day at 03:00). + +### Step 3 – Configure the Keyword Arguments (kwargs) + +In the **Keyword arguments (JSON)** field, enter a JSON object with the +parameters the task accepts: + +```json +{ + "username": "admin", + "collection_acron": "scl" +} +``` + +### Task arguments + +The table below lists **all** arguments accepted by the task. In +django_celery_beat they must be supplied as a JSON object in the +**Keyword arguments** field. + +| Argument | Type | Required | Default | Description | +|---|---|---|---|---| +| `username` | string | **yes** ¹ | — | Username of the Django user that will be recorded as the creator of any new `ArticleProc` records. | +| `user_id` | integer | **yes** ¹ | `None` | Numeric ID of the Django user. Can be used instead of `username`. | +| `collection_acron` | string | no | `None` | Acronym of the collection to process (e.g. `"scl"` for Brazil). When omitted, **all** configured collections are processed. | + +> ¹ At least one of `username` or `user_id` **must** be provided. If both are +> supplied, `user_id` takes precedence. + +#### Warnings + +> **⚠️ Critical:** The `username` or `user_id` must correspond to an existing user. +> If the user is not found, the task will log an error and skip processing. + +> **⚠️ Critical:** A `ClassicWebsiteConfiguration` record with a valid +> `pid_list_path` must exist for the target collection. If the configuration +> is missing or the file cannot be read, the task will fail for that +> collection. + +### Step 4 – Save + +Click **Save**. The task will be picked up by the Celery Beat scheduler +according to the schedule you configured. + +--- + +## Verifying the result + +### 1. Task Tracker (Wagtail admin) + +In the Wagtail admin sidebar, go to **Task Tracker** (under the Tracker section). +Look for entries with the name +`proc.tasks.task_track_classic_website_article_pids`. Each entry shows: + +- **Item** – The collection acronym processed (or `all`). +- **Status** – `started`, `completed`, or `failed`. +- **Detail** – A JSON object containing: + - `params`: the kwargs used. + - `events`: a list with the PID status counts per collection (e.g. + `{"collection": "scl", "matched": 1200, "missing": 50, "exceeding": 3}`). + - `exceptions`: any errors that occurred. + +### 2. ArticleProc records (Wagtail admin) + +Navigate to the **ArticleProc** listing in the admin panel and filter by +`pid_status` to verify the distribution: + +- **matched** – articles whose migration is complete. +- **missing** – articles that still need to be migrated (no `migrated_data`). +- **exceeding** – articles in the system but not present in the classic website PID list. + +### 3. Django shell / programmatic check + +```python +from proc.models import ArticleProc +from django.db.models import Count + +# Summary of pid_status for a collection +ArticleProc.objects.filter( + collection__acron="scl" +).values("pid_status").annotate(total=Count("pid")).order_by("pid_status") +``` + +### 4. Celery worker logs + +Check the Celery worker output for INFO and ERROR messages related to +`task_track_classic_website_article_pids`. Errors are also recorded in the +`UnexpectedEvent` model, accessible from the Wagtail admin panel. diff --git a/docs/processing/guide_task_track_classic_website_article_pids_es.md b/docs/processing/guide_task_track_classic_website_article_pids_es.md new file mode 100644 index 000000000..0cd9bad9b --- /dev/null +++ b/docs/processing/guide_task_track_classic_website_article_pids_es.md @@ -0,0 +1,154 @@ +# Guía: task_track_classic_website_article_pids + +## Propósito + +La tarea `proc.tasks.task_track_classic_website_article_pids` permite +**identificar la completitud de la migración desde el sitio web clásico de +SciELO**. Para ello, compara la lista completa de PIDs de artículos del sitio +web clásico (leída desde el archivo de texto configurado en +`ClassicWebsiteConfiguration.pid_list_path`) con cada registro `ArticleProc` +que ya ha sido creado en el sistema. + +Después de la comparación, la tarea actualiza el campo `pid_status` de cada +registro `ArticleProc` de la colección para que pueda ver rápidamente cuánto +de la migración se ha completado y qué artículos aún requieren atención: + +| Estado | Significado | +|---|---| +| **matched** | El PID existe en la lista del sitio web clásico **y** el `ArticleProc` correspondiente ya tiene `migrated_data` vinculado. La migración de este artículo está completa. | +| **missing** | El PID existe en la lista del sitio web clásico **y** existe un `ArticleProc`, pero **no** tiene `migrated_data` vinculado. El artículo aún no ha sido migrado. | +| **exceeding** | El `ArticleProc` existe en el sistema pero su PID **no** fue encontrado en la lista de PIDs del sitio web clásico. Puede ser un registro que ya no existe en el sitio web clásico. | + +Los PIDs nuevos encontrados en el archivo del sitio web clásico que aún no +tienen un registro `ArticleProc` correspondiente se crean automáticamente con +estado **missing**. + +Al ejecutar esta tarea periódicamente puede monitorear el progreso de la +migración e identificar artículos que aún requieren migración o investigación. + +### Prerrequisitos + +| Requisito | Detalles | +|---|---| +| **Configuración del sitio web clásico** | Debe existir un registro `ClassicWebsiteConfiguration` para la colección de destino, con un `pid_list_path` válido que apunte a un archivo de texto que contenga un PID por línea. | +| **Colección** | Debe existir al menos un registro `Collection`. | +| **Usuario** | Un usuario Django válido (por `username` o `user_id`). | + +--- + +## Cómo crear la tarea periódica (django_celery_beat) + +### Paso 1 – Acceder al panel de administración + +1. Inicie sesión en el panel de administración de Wagtail (por ejemplo, `https:///admin/`). +2. En el menú lateral izquierdo, vaya a **Settings > Periodic tasks**. + +> Si no ve este elemento del menú, asegúrese de que la aplicación +> `django_celery_beat` esté instalada y de que su usuario tenga los +> permisos apropiados. + +### Paso 2 – Crear una nueva tarea periódica + +1. Haga clic en **Add periodic task**. +2. Complete los campos obligatorios: + +| Campo | Valor | +|---|---| +| **Name** | Un nombre descriptivo, por ejemplo `Track Classic Website PIDs (scl)` | +| **Task (registered)** | Seleccione `proc.tasks.task_track_classic_website_article_pids` | +| **Enabled** | ✅ Marcado | + +3. Elija una programación. Por ejemplo, para ejecutar una vez al día, cree o + seleccione un **Interval Schedule** de `1 day` o un **Crontab Schedule** + como `0 3 * * *` (todos los días a las 03:00). + +### Paso 3 – Configurar los argumentos por palabra clave (kwargs) + +En el campo **Keyword arguments (JSON)**, ingrese un objeto JSON con los +parámetros que acepta la tarea: + +```json +{ + "username": "admin", + "collection_acron": "scl" +} +``` + +### Argumentos de la tarea + +La tabla a continuación lista **todos** los argumentos aceptados por la tarea. +En django_celery_beat deben proporcionarse como un objeto JSON en el campo +**Keyword arguments**. + +| Argumento | Tipo | Obligatorio | Valor por defecto | Descripción | +|---|---|---|---|---| +| `username` | string | **sí** ¹ | — | Nombre de usuario del usuario Django que se registrará como creador de cualquier nuevo registro `ArticleProc`. | +| `user_id` | integer | **sí** ¹ | `None` | ID numérico del usuario Django. Puede usarse en lugar de `username`. | +| `collection_acron` | string | no | `None` | Acrónimo de la colección a procesar (por ejemplo, `"scl"` para Brasil). Si se omite, se procesan **todas** las colecciones configuradas. | + +> ¹ Al menos uno de `username` o `user_id` **debe** proporcionarse. Si se +> proporcionan ambos, `user_id` tiene prioridad. + +#### Advertencias + +> **⚠️ Crítico:** El `username` o `user_id` debe corresponder a un usuario +> existente. Si el usuario no se encuentra, la tarea registrará un error y +> omitirá el procesamiento. + +> **⚠️ Crítico:** Debe existir un registro `ClassicWebsiteConfiguration` con un +> `pid_list_path` válido para la colección de destino. Si la configuración +> no existe o el archivo no se puede leer, la tarea fallará para esa +> colección. + +### Paso 4 – Guardar + +Haga clic en **Save**. La tarea será recogida por el programador Celery Beat +de acuerdo con la programación que configuró. + +--- + +## Verificación del resultado + +### 1. Task Tracker (panel de administración de Wagtail) + +En la barra lateral del panel de administración de Wagtail, vaya a +**Task Tracker** (en la sección Tracker). Busque entradas con el nombre +`proc.tasks.task_track_classic_website_article_pids`. Cada entrada muestra: + +- **Item** – El acrónimo de la colección procesada (o `all`). +- **Status** – `started`, `completed` o `failed`. +- **Detail** – Un objeto JSON que contiene: + - `params`: los kwargs utilizados. + - `events`: una lista con los conteos de estado de PID por colección (por + ejemplo, `{"collection": "scl", "matched": 1200, "missing": 50, + "exceeding": 3}`). + - `exceptions`: cualquier error que haya ocurrido. + +### 2. Registros ArticleProc (panel de administración de Wagtail) + +Navegue a la lista de **ArticleProc** en el panel de administración y filtre +por `pid_status` para verificar la distribución: + +- **matched** – artículos cuya migración está completa. +- **missing** – artículos que aún necesitan ser migrados (sin `migrated_data`). +- **exceeding** – artículos en el sistema pero no presentes en la lista de PIDs del + sitio web clásico. + +### 3. Django shell / verificación programática + +```python +from proc.models import ArticleProc +from django.db.models import Count + +# Resumen de pid_status para una colección +ArticleProc.objects.filter( + collection__acron="scl" +).values("pid_status").annotate(total=Count("pid")).order_by("pid_status") +``` + +### 4. Logs del worker de Celery + +Revise la salida del worker de Celery para mensajes INFO y ERROR relacionados +con `task_track_classic_website_article_pids`. Los errores también se registran +en el modelo `UnexpectedEvent`, accesible desde el panel de administración de +Wagtail. diff --git a/docs/processing/guide_task_track_classic_website_article_pids_pt.md b/docs/processing/guide_task_track_classic_website_article_pids_pt.md new file mode 100644 index 000000000..aeff94eeb --- /dev/null +++ b/docs/processing/guide_task_track_classic_website_article_pids_pt.md @@ -0,0 +1,153 @@ +# Guia: task_track_classic_website_article_pids + +## Propósito + +A tarefa `proc.tasks.task_track_classic_website_article_pids` permite +**identificar a completude da migração a partir do site clássico do SciELO**. +Para isso, compara a lista completa de PIDs de artigos do site clássico (lida +a partir do arquivo de texto configurado em +`ClassicWebsiteConfiguration.pid_list_path`) com cada registro `ArticleProc` +já criado no sistema. + +Após a comparação, a tarefa atualiza o campo `pid_status` de cada registro +`ArticleProc` da coleção para que você possa verificar rapidamente o quanto da +migração já foi concluído e quais artigos ainda precisam de atenção: + +| Status | Significado | +|---|---| +| **matched** | O PID existe na lista do site clássico **e** o `ArticleProc` correspondente já possui `migrated_data` vinculado. A migração deste artigo está completa. | +| **missing** | O PID existe na lista do site clássico **e** um `ArticleProc` existe, porém **não** possui `migrated_data` vinculado. O artigo ainda não foi migrado. | +| **exceeding** | O `ArticleProc` existe no sistema, mas o PID **não** foi encontrado na lista de PIDs do site clássico. Pode ser um registro que não existe mais no site clássico. | + +PIDs novos encontrados no arquivo do site clássico que ainda não possuem um +registro `ArticleProc` correspondente são criados automaticamente com status +**missing**. + +Ao executar esta tarefa periodicamente, é possível acompanhar o progresso da +migração e identificar artigos que ainda precisam ser migrados ou investigados. + +### Pré-requisitos + +| Requisito | Detalhes | +|---|---| +| **Configuração do site clássico** | Deve existir um registro `ClassicWebsiteConfiguration` para a coleção de destino, com um `pid_list_path` válido apontando para um arquivo de texto contendo um PID por linha. | +| **Coleção** | Deve existir pelo menos um registro `Collection`. | +| **Usuário** | Um usuário Django válido (por `username` ou `user_id`). | + +--- + +## Como criar a tarefa periódica (django_celery_beat) + +### Passo 1 – Acessar o painel de administração + +1. Faça login no painel de administração do Wagtail (por exemplo, `https:///admin/`). +2. No menu lateral esquerdo, acesse **Settings > Periodic tasks**. + +> Se você não visualizar este item de menu, verifique se o aplicativo +> `django_celery_beat` está instalado e se o seu usuário possui as +> permissões apropriadas. + +### Passo 2 – Criar uma nova tarefa periódica + +1. Clique em **Add periodic task**. +2. Preencha os campos obrigatórios: + +| Campo | Valor | +|---|---| +| **Name** | Um nome descritivo, por exemplo `Track Classic Website PIDs (scl)` | +| **Task (registered)** | Selecione `proc.tasks.task_track_classic_website_article_pids` | +| **Enabled** | ✅ Marcado | + +3. Escolha uma programação. Por exemplo, para executar uma vez ao dia, crie ou + selecione um **Interval Schedule** de `1 day` ou um **Crontab Schedule** + como `0 3 * * *` (todos os dias às 03:00). + +### Passo 3 – Configurar os argumentos por palavra-chave (kwargs) + +No campo **Keyword arguments (JSON)**, insira um objeto JSON com os +parâmetros que a tarefa aceita: + +```json +{ + "username": "admin", + "collection_acron": "scl" +} +``` + +### Argumentos da tarefa + +A tabela abaixo lista **todos** os argumentos aceitos pela tarefa. No +django_celery_beat eles devem ser fornecidos como um objeto JSON no campo +**Keyword arguments**. + +| Argumento | Tipo | Obrigatório | Valor padrão | Descrição | +|---|---|---|---|---| +| `username` | string | **sim** ¹ | — | Nome de usuário do usuário Django que será registrado como criador de quaisquer novos registros `ArticleProc`. | +| `user_id` | integer | **sim** ¹ | `None` | ID numérico do usuário Django. Pode ser usado no lugar do `username`. | +| `collection_acron` | string | não | `None` | Acrônimo da coleção a ser processada (por exemplo, `"scl"` para Brasil). Se omitido, **todas** as coleções configuradas serão processadas. | + +> ¹ Pelo menos um entre `username` ou `user_id` **deve** ser fornecido. Se +> ambos forem informados, `user_id` tem prioridade. + +#### Avisos + +> **⚠️ Crítico:** O `username` ou `user_id` deve corresponder a um usuário +> existente. Se o usuário não for encontrado, a tarefa registrará um erro +> e pulará o processamento. + +> **⚠️ Crítico:** Deve existir um registro `ClassicWebsiteConfiguration` com um +> `pid_list_path` válido para a coleção de destino. Se a configuração não +> existir ou o arquivo não puder ser lido, a tarefa falhará para essa +> coleção. + +### Passo 4 – Salvar + +Clique em **Save**. A tarefa será capturada pelo agendador Celery Beat de +acordo com a programação que você configurou. + +--- + +## Verificação do resultado + +### 1. Task Tracker (painel de administração do Wagtail) + +Na barra lateral do painel de administração do Wagtail, acesse +**Task Tracker** (na seção Tracker). Procure entradas com o nome +`proc.tasks.task_track_classic_website_article_pids`. Cada entrada exibe: + +- **Item** – O acrônimo da coleção processada (ou `all`). +- **Status** – `started`, `completed` ou `failed`. +- **Detail** – Um objeto JSON contendo: + - `params`: os kwargs utilizados. + - `events`: uma lista com as contagens de status de PID por coleção (por + exemplo, `{"collection": "scl", "matched": 1200, "missing": 50, + "exceeding": 3}`). + - `exceptions`: quaisquer erros que tenham ocorrido. + +### 2. Registros ArticleProc (painel de administração do Wagtail) + +Navegue até a listagem de **ArticleProc** no painel de administração e filtre +por `pid_status` para verificar a distribuição: + +- **matched** – artigos cuja migração está completa. +- **missing** – artigos que ainda precisam ser migrados (sem `migrated_data`). +- **exceeding** – artigos no sistema, mas não presentes na lista de PIDs do site clássico. + +### 3. Django shell / verificação programática + +```python +from proc.models import ArticleProc +from django.db.models import Count + +# Resumo de pid_status para uma coleção +ArticleProc.objects.filter( + collection__acron="scl" +).values("pid_status").annotate(total=Count("pid")).order_by("pid_status") +``` + +### 4. Logs do worker do Celery + +Verifique a saída do worker do Celery para mensagens INFO e ERROR relacionadas +a `task_track_classic_website_article_pids`. Os erros também são registrados no +modelo `UnexpectedEvent`, acessível a partir do painel de administração do +Wagtail. diff --git a/files_storage/wagtail_hooks.py b/files_storage/wagtail_hooks.py index 988d802e6..5d9d42218 100644 --- a/files_storage/wagtail_hooks.py +++ b/files_storage/wagtail_hooks.py @@ -1,42 +1,27 @@ -from django.http import HttpResponseRedirect from django.utils.translation import gettext_lazy as _ -from wagtail_modeladmin.options import ModelAdmin, modeladmin_register -from wagtail_modeladmin.views import CreateView +from wagtail.snippets.views.snippets import SnippetViewSet from config.menu import get_menu_order from files_storage.models import MinioConfiguration -class MinioConfigurationCreateView(CreateView): - def form_valid(self, form): - self.object = form.save_all(self.request.user) - return HttpResponseRedirect(self.get_success_url()) - - -class MinioConfigurationAdmin(ModelAdmin): +class MinioConfigurationViewSet(SnippetViewSet): model = MinioConfiguration menu_label = _("Minio Configuration") - create_view_class = MinioConfigurationCreateView menu_icon = "folder" menu_order = get_menu_order("files_storage") # no menu, ficará disponível como sub-menu em "Settings" add_to_settings_menu = True - exclude_from_explorer = False - inspect_view_enabled = True list_per_page = 10 list_display = ( "name", "host", "bucket_root", - "bucket_app_subdir", ) search_fields = ( "name", "host", "bucket_root", - "bucket_app_subdir", ) - -modeladmin_register(MinioConfigurationAdmin) diff --git a/htmlxml/migrations/0005_remove_duplicate_htmlxml_and_add_unique_constraint.py b/htmlxml/migrations/0005_remove_duplicate_htmlxml_and_add_unique_constraint.py index 040c81e82..4121b96c4 100644 --- a/htmlxml/migrations/0005_remove_duplicate_htmlxml_and_add_unique_constraint.py +++ b/htmlxml/migrations/0005_remove_duplicate_htmlxml_and_add_unique_constraint.py @@ -60,6 +60,10 @@ def reverse_migration(apps, schema_editor): class Migration(migrations.Migration): + # Disable atomic transactions to allow the deletes to commit before creating the index + # This prevents PostgreSQL "pending trigger events" error when creating the unique constraint + atomic = False + dependencies = [ ("htmlxml", "0004_alter_bodyandbackfile_file_and_more"), ] diff --git a/install.sh b/install.sh new file mode 100644 index 000000000..5bb6b0ff9 --- /dev/null +++ b/install.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# +# Bash script for SciELO Upload Installation/Update +# +# How it works: +# ./install.sh +# +# Check the tag version at https://github.com/scieloorg/scms-upload/releases + +set -euo pipefail + +if [ -z "$1" ]; then + # Version is mandatory + echo "Version not informed. Please, inform the Upload version. E.g.: v2.3.4, ./install.sh v2.3.4" + exit 128 +fi + +UPLOAD_DIR=./upload +ENV_FILES_DIR=.envs/.production +IS_UPDATE=0 + +echo "Checking directory $UPLOAD_DIR/$ENV_FILES_DIR ..." + +if [ ! -d "$UPLOAD_DIR/$ENV_FILES_DIR" ]; then + echo "Installing version $1 ..." + git clone --no-checkout --filter=blob:none --depth=1 --branch "$1" --sparse https://github.com/scieloorg/scms-upload "$UPLOAD_DIR" + cd "$UPLOAD_DIR" + git sparse-checkout init --cone + git sparse-checkout set .envs/.production-template compose/production/postgres/maintenance Makefile production.yml + git checkout "$1" + mv .envs/.production-template "$ENV_FILES_DIR" +else + echo "Updating to version $1 ..." + echo " Downloading files from Git repository ..." + cd "$UPLOAD_DIR" + wget -O Makefile "https://raw.githubusercontent.com/scieloorg/scms-upload/refs/tags/$1/Makefile" + wget -O production.yml "https://raw.githubusercontent.com/scieloorg/scms-upload/refs/tags/$1/production.yml" + IS_UPDATE=1 +fi + +echo "$1" > VERSION + +echo "Version $(cat VERSION) installed/updated!" + +if [ "$IS_UPDATE" -eq 1 ]; then + # For updating: update Django containers and make DB migrations + make update_webapp compose=production.yml + make django_migrate_fresh_migrations compose=production.yml +else + # For installation: make Docker login and build DB + read -p "Enter the Docker login user: " DOCKER_USER + docker login -u "$DOCKER_USER" + make build compose=production.yml + make django_migrate compose=production.yml +fi + +make up compose=production.yml +make ps compose=production.yml + +cd - +exit 0 + diff --git a/institution/wagtail_hooks.py b/institution/wagtail_hooks.py index 7f305535a..0adef5f20 100644 --- a/institution/wagtail_hooks.py +++ b/institution/wagtail_hooks.py @@ -1,29 +1,19 @@ -from django.http import HttpResponseRedirect from django.utils.translation import gettext_lazy as _ -from wagtail_modeladmin.options import ModelAdmin, modeladmin_register -from wagtail_modeladmin.views import CreateView +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet from config.menu import get_menu_order from .models import Institution -class InstitutionCreateView(CreateView): - def form_valid(self, form): - self.object = form.save_all(self.request.user) - return HttpResponseRedirect(self.get_success_url()) - - -class InstitutionAdmin(ModelAdmin): +class InstitutionViewSet(SnippetViewSet): model = Institution - create_view_class = InstitutionCreateView menu_label = _("Institution") menu_icon = "folder" menu_order = get_menu_order("institution") - add_to_settings_menu = False # or True to add your model to the Settings sub-menu - exclude_from_explorer = ( - False # or True to exclude pages of this type from Wagtail's explorer view - ) + add_to_settings_menu = False + list_display = ( "name", "institution_type", @@ -54,4 +44,4 @@ class InstitutionAdmin(ModelAdmin): export_filename = "institutions" -# modeladmin_register(InstitutionAdmin) +register_snippet(InstitutionViewSet) diff --git a/issue/wagtail_hooks.py b/issue/wagtail_hooks.py index ff2669457..def92299a 100644 --- a/issue/wagtail_hooks.py +++ b/issue/wagtail_hooks.py @@ -6,6 +6,7 @@ from config.menu import get_menu_order from issue.views import IssueCreateView, TOCEditView +from team.models import get_user_membership_ids from .models import TOC, Issue @@ -57,6 +58,19 @@ class IssueSnippetViewSet(SnippetViewSet): list_export = ["csv", "xlsx"] export_filename = "issues" + def get_queryset(self, request): + qs = super().get_queryset(request) + user = request.user + + if user.is_superuser: + return qs + + membership = get_user_membership_ids(user) + if membership.get("journal_list_ids"): + return qs.filter(journal__in=membership["journal_list_ids"]).distinct() + + return qs.none() + class TOCSnippetViewSet(SnippetViewSet): model = TOC @@ -99,6 +113,19 @@ class TOCSnippetViewSet(SnippetViewSet): list_export = ["csv", "xlsx"] export_filename = "table_of_contents" + def get_queryset(self, request): + qs = super().get_queryset(request) + user = request.user + + if user.is_superuser: + return qs + + membership = get_user_membership_ids(user) + if membership.get("journal_list_ids"): + return qs.filter(issue__journal__in=membership["journal_list_ids"]).distinct() + + return qs.none() + # Grupo de Snippets para Issues class IssueSnippetViewSetGroup(SnippetViewSetGroup): diff --git a/journal/migrations/0005_journaltoc_alter_journal_official_journal_and_more.py b/journal/migrations/0005_journaltoc_alter_journal_official_journal_and_more.py deleted file mode 100644 index a53441627..000000000 --- a/journal/migrations/0005_journaltoc_alter_journal_official_journal_and_more.py +++ /dev/null @@ -1,44 +0,0 @@ -# Generated by Django 5.0.3 on 2024-10-09 23:04 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("journal", "0004_journal_contact_address_journal_contact_location_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="JournalTOC", - fields=[], - options={ - "proxy": True, - "indexes": [], - "constraints": [], - }, - bases=("journal.journal", models.Model), - ), - migrations.AlterField( - model_name="journal", - name="official_journal", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to="journal.officialjournal", - ), - ), - migrations.AlterField( - model_name="journalsection", - name="parent", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="j_sections", - to="journal.journaltoc", - ), - ), - ] diff --git a/journal/migrations/0007_merge_20241113_1945.py b/journal/migrations/0007_merge_20241113_1945.py deleted file mode 100644 index 2abdf6ade..000000000 --- a/journal/migrations/0007_merge_20241113_1945.py +++ /dev/null @@ -1,12 +0,0 @@ -# Generated by Django 5.0.3 on 2024-11-13 19:45 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("journal", "0005_journaltoc_alter_journal_official_journal_and_more"), - ("journal", "0006_alter_journalsection_text"), - ] - - operations = [] diff --git a/journal/migrations/0008_merge_0007_journal_wos_areas_0007_merge_20241113_1945.py b/journal/migrations/0008_merge_0007_journal_wos_areas_0007_merge_20241113_1945.py deleted file mode 100644 index 007e78f9a..000000000 --- a/journal/migrations/0008_merge_0007_journal_wos_areas_0007_merge_20241113_1945.py +++ /dev/null @@ -1,12 +0,0 @@ -# Generated by Django 5.0.3 on 2025-03-02 00:16 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("journal", "0007_journal_wos_areas"), - ("journal", "0007_merge_20241113_1945"), - ] - - operations = [] diff --git a/journal/migrations/0014_journal_core_synchronized_alter_subject_code.py b/journal/migrations/0012_journal_core_synchronized_squashed_0013_alter_subject_code.py similarity index 66% rename from journal/migrations/0014_journal_core_synchronized_alter_subject_code.py rename to journal/migrations/0012_journal_core_synchronized_squashed_0013_alter_subject_code.py index 3de922218..2f781f699 100644 --- a/journal/migrations/0014_journal_core_synchronized_alter_subject_code.py +++ b/journal/migrations/0012_journal_core_synchronized_squashed_0013_alter_subject_code.py @@ -1,11 +1,16 @@ -# Generated by Django 5.0.8 on 2025-08-26 14:12 +# Generated by Django 5.0.8 on 2025-08-25 22:48 from django.db import migrations, models class Migration(migrations.Migration): + replaces = [ + ("journal", "0012_journal_core_synchronized"), + ("journal", "0013_alter_subject_code"), + ] + dependencies = [ - ("journal", "0013_delete_journaltoc_alter_journalsection_parent"), + ("journal", "0011_alter_journal_contact_address_and_more"), ] operations = [ diff --git a/journal/migrations/0012_merge_20250626_1842.py b/journal/migrations/0012_merge_20250626_1842.py deleted file mode 100644 index 0fe40c74d..000000000 --- a/journal/migrations/0012_merge_20250626_1842.py +++ /dev/null @@ -1,12 +0,0 @@ -# Generated by Django 5.0.8 on 2025-06-26 18:42 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("journal", "0008_merge_0007_journal_wos_areas_0007_merge_20241113_1945"), - ("journal", "0011_alter_journal_contact_address_and_more"), - ] - - operations = [] diff --git a/journal/migrations/0013_delete_journaltoc_alter_journalsection_parent.py b/journal/migrations/0013_delete_journaltoc_alter_journalsection_parent.py deleted file mode 100644 index 4ebacf4b5..000000000 --- a/journal/migrations/0013_delete_journaltoc_alter_journalsection_parent.py +++ /dev/null @@ -1,28 +0,0 @@ -# Generated by Django 5.0.8 on 2025-06-27 03:36 - -import django.db.models.deletion -import modelcluster.fields -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("journal", "0012_merge_20250626_1842"), - ] - - operations = [ - migrations.DeleteModel( - name="JournalTOC", - ), - migrations.AlterField( - model_name="journalsection", - name="parent", - field=modelcluster.fields.ParentalKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="j_sections", - to="journal.journal", - ), - ), - ] diff --git a/location/wagtail_hooks.py b/location/wagtail_hooks.py index 022c40666..9d2cb9d95 100644 --- a/location/wagtail_hooks.py +++ b/location/wagtail_hooks.py @@ -1,29 +1,19 @@ -from django.http import HttpResponseRedirect from django.utils.translation import gettext_lazy as _ -from wagtail_modeladmin.options import ModelAdmin, modeladmin_register -from wagtail_modeladmin.views import CreateView +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet from config.menu import get_menu_order from .models import Location -class LocationCreateView(CreateView): - def form_valid(self, form): - self.object = form.save_all(self.request.user) - return HttpResponseRedirect(self.get_success_url()) - - -class LocationAdmin(ModelAdmin): +class LocationViewSet(SnippetViewSet): model = Location - create_view_class = LocationCreateView menu_label = _("Location") menu_icon = "folder" menu_order = get_menu_order("location") - add_to_settings_menu = False # or True to add your model to the Settings sub-menu - exclude_from_explorer = ( - False # or True to exclude pages of this type from Wagtail's explorer view - ) + add_to_settings_menu = False + list_display = ( "country", "state", @@ -45,4 +35,4 @@ class LocationAdmin(ModelAdmin): export_filename = "locations" -# modeladmin_register(LocationAdmin) +register_snippet(LocationViewSet) diff --git a/migration/choices.py b/migration/choices.py index 8f1e4bd32..d1fc0a08e 100644 --- a/migration/choices.py +++ b/migration/choices.py @@ -16,3 +16,16 @@ (MS_IMPORTED, _("Imported")), (MS_PUBLISHED, _("Published")), ) + + +PID_STATUS_MISSING = "missing" +PID_STATUS_MATCHED = "matched" +PID_STATUS_EXCEEDING = "exceeding" +PID_STATUS_UNKNOWN = "unknown" + +PID_STATUS = ( + (PID_STATUS_MISSING, _("Missing")), + (PID_STATUS_MATCHED, _("Matched")), + (PID_STATUS_EXCEEDING, _("Exceeding")), + (PID_STATUS_UNKNOWN, _("Unknown")), +) \ No newline at end of file diff --git a/migration/controller.py b/migration/controller.py index 533a9eb19..5c84cf142 100644 --- a/migration/controller.py +++ b/migration/controller.py @@ -625,6 +625,13 @@ def _build_sps_package_add_assets(self, zf, issue_proc): # obtém o nome do arquivo no padrão sps sps_filename = xml_graphic.name_canonical(self.sps_pkg_name).lower() + # se o nome canônico não tem extensão, obtém a extensão do arquivo original + _, sps_ext = os.path.splitext(sps_filename) + if not sps_ext: + _, asset_ext = os.path.splitext(asset.file.path) + if asset_ext: + sps_filename += asset_ext.lower() + # indica a troca de href original para o padrão SPS self.replacements[xml_graphic.xlink_href] = sps_filename diff --git a/migration/models.py b/migration/models.py index c3cf604a0..472fdda30 100644 --- a/migration/models.py +++ b/migration/models.py @@ -176,6 +176,30 @@ def get_or_create( base_form_class = CoreAdminModelForm + def get_pid_list(self): + """ + Reads and returns a set of article PIDs from the file + indicated by pid_list_path. + """ + if not self.pid_list_path: + return set() + try: + with open(self.pid_list_path, "r") as fp: + return set(fp.read().split()) + except FileNotFoundError: + logging.warning( + "pid_list_path file not found: %s", self.pid_list_path + ) + except Exception as e: + logging.exception( + "Error reading pid_list_path %s: %s", self.pid_list_path, e + ) + return set() + + @property + def pid_list(self): + return self.get_pid_list() + class MigratedData(CommonControlField): collection = models.ForeignKey( @@ -240,8 +264,9 @@ def register_classic_website_data( force_update=False, ): try: - if data: - classic_ws_obj = cls.get_data_from_classic_website(data) + if not data: + raise ValueError(f"register_classic_website_data {collection} {pid}: no data") + classic_ws_obj = cls.get_data_from_classic_website(data) except Exception as e: classic_ws_obj = None @@ -288,6 +313,13 @@ def create_or_update_migrated_data( if obj.is_up_to_date(isis_updated_date, data) and not force_update: return obj obj.updated_by = user + except cls.MultipleObjectsReturned: + qs = cls.objects.filter(collection=collection, pid=pid).order_by("-updated") + obj = qs.first() + if obj is None: + raise cls.DoesNotExist + qs.exclude(pk=obj.pk).delete() + obj.updated_by = user except cls.DoesNotExist: obj = cls() obj.collection = collection @@ -569,6 +601,11 @@ def is_up_to_date(self, file_datetime_iso): self.file_datetime_iso and self.file_datetime_iso == file_datetime_iso ) + def get_lines(self): + """Read file content and return whitespace-split tokens.""" + with open(self.file.path, mode="r", encoding="utf-8") as fp: + return fp.read().split() + @property def text(self): try: @@ -637,29 +674,43 @@ class MigratedArticle(MigratedData): ] def __str__(self): - document = self.document - return f"{document.journal.acronym} {document.issue.issue_label} {document.filename_without_extension}" + return self.path @classmethod def get_data_from_classic_website(cls, data): + if not data: + return None return classic_ws.Document(data) @property def document(self): + if not self.data: + return return classic_ws.Document(self.data) @property def n_paragraphs(self): + if not self.document: + return None return len(self.document.p_records or []) @property def pkg_name(self): - return self.document.filename_without_extension + try: + return self.document.filename_without_extension + except AttributeError: + return None @property def path(self): document = self.document - return f"{document.journal.acronym}/{document.issue.issue_label}/{document.filename_without_extension}" + try: + return f"{document.journal.acronym}/{document.issue.issue_label}/{document.filename_without_extension}" + except AttributeError: + journal = self.pid[1:10] + issue = self.pid[10:18] + article = self.pid[-5:] + return f"{journal}/{issue}/{article}" class JournalAcronIdFile(CommonControlField, ClusterableModel): diff --git a/migration/test_models.py b/migration/test_models.py new file mode 100644 index 000000000..7581385c5 --- /dev/null +++ b/migration/test_models.py @@ -0,0 +1,79 @@ +import unittest +from unittest.mock import MagicMock, Mock, patch + +from migration.models import MigratedData + + +class MigratedDataCreateOrUpdateTestCase(unittest.TestCase): + """Test cases for MigratedData.create_or_update_migrated_data() handling of duplicates.""" + + @patch("migration.models.now") + @patch.object(MigratedData.objects, "get") + def test_create_or_update_returns_existing_when_up_to_date(self, mock_get, mock_now): + """Test that an existing up-to-date record is returned without changes.""" + mock_now.return_value = "2024-01-01" + mock_obj = Mock(spec=MigratedData) + mock_obj.is_up_to_date.return_value = True + mock_get.return_value = mock_obj + + result = MigratedData.create_or_update_migrated_data( + user=Mock(), + collection=Mock(), + pid="pid123", + data={"key": "value"}, + isis_updated_date="20240101", + ) + + self.assertEqual(result, mock_obj) + + @patch("migration.models.now") + @patch.object(MigratedData.objects, "filter") + @patch.object(MigratedData.objects, "get") + def test_create_or_update_handles_multiple_objects_returned( + self, mock_get, mock_filter, mock_now + ): + """Test that duplicates are resolved by keeping the most recent and deleting others.""" + mock_now.return_value = "2024-01-01" + mock_collection = Mock() + mock_user = Mock() + + mock_get.side_effect = MigratedData.MultipleObjectsReturned() + + mock_recent = MagicMock(spec=MigratedData) + mock_recent.pk = 1 + mock_recent.content_type = "article" + mock_recent.collection = mock_collection + mock_recent.pid = "pid123" + mock_recent.migration_status = "TODO" + mock_recent.data = None + mock_recent.isis_created_date = "20240101" + mock_recent.isis_updated_date = None + + mock_queryset = MagicMock() + mock_ordered_qs = MagicMock() + mock_ordered_qs.first.return_value = mock_recent + mock_queryset.order_by.return_value = mock_ordered_qs + mock_filter.return_value = mock_queryset + + mock_exclude_qs = MagicMock() + mock_ordered_qs.exclude.return_value = mock_exclude_qs + + result = MigratedData.create_or_update_migrated_data( + user=mock_user, + collection=mock_collection, + pid="pid123", + data={"key": "value"}, + migration_status="TODO", + content_type="article", + isis_created_date="20240101", + ) + + # Verify the returned object is the most recent one + self.assertEqual(result, mock_recent) + # Verify duplicates were deleted via filter().exclude().delete() + mock_filter.assert_any_call(collection=mock_collection, pid="pid123") + mock_queryset.order_by.assert_called_with("-updated") + mock_ordered_qs.exclude.assert_called_once_with(pk=mock_recent.pk) + mock_exclude_qs.delete.assert_called_once() + # Verify the most recent was kept and saved + mock_recent.save.assert_called_once() diff --git a/migration/wagtail_hooks.py b/migration/wagtail_hooks.py index d89949985..14519449c 100644 --- a/migration/wagtail_hooks.py +++ b/migration/wagtail_hooks.py @@ -236,7 +236,6 @@ class MigrationViewSetGroup(SnippetViewSetGroup): menu_icon = "folder-open-inverse" menu_order = get_menu_order("migration") items = ( - ClassicWebsiteConfigurationViewSet, MigratedDataViewSet, MigratedJournalViewSet, MigratedIssueViewSet, diff --git a/package/models.py b/package/models.py index ea62c3967..4d2fbdf89 100644 --- a/package/models.py +++ b/package/models.py @@ -701,9 +701,10 @@ def upload_to_the_cloud( response = {} if content: + mimetype = mimetypes.types_map.get(ext.lower()) or "application/octet-stream" response = minio_push_file_content( content=content, - mimetype=mimetypes.types_map[ext], + mimetype=mimetype, object_name=f"{self.subdir}/{filename}", ) uri = response["uri"] @@ -768,6 +769,12 @@ def upload_components_to_the_cloud(self, user, original_pkg_components, zip_file else: component = original_pkg_components.get(item) or {} + if not ext: + legacy_uri = component.get("legacy_uri") + if legacy_uri: + _, ext = os.path.splitext(legacy_uri) + if ext: + item = item + ext result = self.upload_to_the_cloud( user=user, filename=item, @@ -781,10 +788,12 @@ def upload_components_to_the_cloud(self, user, original_pkg_components, zip_file return {"xml_with_pre": xml_with_pre, "items": items} def upload_xml_to_the_cloud(self, user, xml_with_pre): - replacements = { - item.basename: item.uri - for item in self.components.filter(uri__isnull=False).iterator() - } + replacements = {} + for item in self.components.filter(uri__isnull=False).iterator(): + replacements[item.basename] = item.uri + name_without_ext, file_ext = os.path.splitext(item.basename) + if file_ext: + replacements[name_without_ext] = item.uri if replacements: xml_assets = ArticleAssets(xml_with_pre.xmltree) xml_assets.replace_names(replacements) diff --git a/pid_provider/client.py b/pid_provider/client.py index ddceb920d..abdc43849 100644 --- a/pid_provider/client.py +++ b/pid_provider/client.py @@ -36,6 +36,7 @@ def __init__( timeout=None, api_username=None, api_password=None, + verify=False, ): self.token = None self.pid_provider_api_post_xml = pid_provider_api_post_xml @@ -43,6 +44,7 @@ def __init__( self.api_username = api_username self.api_password = api_password self.timeout = timeout or 120 + self.verify = verify @property def enabled(self): @@ -205,7 +207,7 @@ def _post_xml(self, zip_xml_file_path, token, timeout): files=files, headers=header, timeout=timeout, - verify=False, + verify=self.verify, json=True, ) except Exception as e: @@ -299,7 +301,7 @@ def _post_fix_pid_v2(self, pid_v3, correct_pid_v2, token, timeout): data={"pid_v3": pid_v3, "correct_pid_v2": correct_pid_v2}, headers=header, timeout=timeout, - verify=False, + verify=self.verify, json=True, ) except Exception as e: diff --git a/pid_provider/models.py b/pid_provider/models.py index fe92362f7..28384fcc5 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -1272,14 +1272,16 @@ def is_registered( @classmethod def get_by_pid_v3(cls, pid_v3, partial_pid_v2=None, pid_v2=None): params = {} + if pid_v3: + params["v3"] = pid_v3 if pid_v2: params["v2"] = pid_v2 if partial_pid_v2: params["v2__contains"] = partial_pid_v2 try: - return cls.objects.get(v3=pid_v3, **params) + return cls.objects.get(**params) except cls.MultipleObjectsReturned as e: - return cls.objects.filter(v3=pid_v3, **params).order_by("-updated").first() + return cls.objects.filter(**params).order_by("-updated").first() @classmethod @profile_classmethod diff --git a/pid_provider/query_params.py b/pid_provider/query_params.py index ef3017b24..bfb6901bd 100644 --- a/pid_provider/query_params.py +++ b/pid_provider/query_params.py @@ -63,9 +63,19 @@ def aop_pid(self): @cached_property def pkg_name(self): - """Nome do pacote do documento.""" + """Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter""" return self.xml_adapter.pkg_name - + + @cached_property + def sps_pkg_name(self): + """Nome do pacote do documento (deprecated).""" + return self.xml_adapter.sps_pkg_name + + @cached_property + def deprecated_sps_pkg_name(self): + """Nome do pacote do documento (deprecated).""" + return self.xml_adapter.sps_pkg_name + @cached_property def main_doi(self): """DOI principal do documento.""" @@ -176,8 +186,15 @@ def identifier_queries(self): q |= Q(v2=self.aop_pid) | Q(aop_pid=self.aop_pid) # Package name + pkg_names = set() if self.pkg_name: - q |= Q(pkg_name=self.pkg_name) + pkg_names.add(self.pkg_name) + if self.sps_pkg_name: + pkg_names.add(self.sps_pkg_name) + if self.deprecated_sps_pkg_name: + pkg_names.add(self.deprecated_sps_pkg_name) + if pkg_names: + q |= Q(pkg_name__in=pkg_names) # # DOI principal # if self.main_doi: diff --git a/pid_provider/requester.py b/pid_provider/requester.py index 4373260e1..e4f2ae3ed 100644 --- a/pid_provider/requester.py +++ b/pid_provider/requester.py @@ -246,13 +246,19 @@ def local_registration( resp["synchronized"] = registered.get("registered_in_core") and bool( resp.get("v3") ) - registered.update(resp) + try: + resp.pop("xml_with_pre") + except KeyError: + pass detail = resp + registered.update(resp) op.finish(user, completed=True, detail=detail) return resp except Exception as e: + logging.exception(e) + logging.error(detail) exc_type, exc_value, exc_traceback = sys.exc_info() op.finish(user, completed=False, exception=e, exc_traceback=exc_traceback) return {"error_msg": str(e), "error_type": str(type(e))} diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index 50a47ccb6..3810bbacf 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -80,6 +80,7 @@ def task_load_records_from_counter_dict( timeout=None, force_update=None, opac_domain=None, + stop=None, ): """ Coleta documentos de uma coleção específica via endpoint counter_dict do OPAC. @@ -100,6 +101,8 @@ def task_load_records_from_counter_dict( timeout (int, optional): Timeout em segundos para requisições HTTP force_update (bool, optional): Força atualização mesmo se já existe opac_domain (str, optional): Domínio do OPAC (default: "www.scielo.br") + stop (int, optional): Quantidade máxima de itens a coletar. + Se None, coleta todos os itens disponíveis. Returns: None @@ -134,6 +137,7 @@ def task_load_records_from_counter_dict( ) # Itera sobre documentos e dispara tarefas individuais + count = 0 for document in harvester.harvest_documents(): origin_date = document.get("origin_date") task_load_record_from_xml_url.delay( @@ -145,6 +149,9 @@ def task_load_records_from_counter_dict( origin_date=origin_date, force_update=force_update, ) + count += 1 + if stop and count >= stop: + break except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -245,3 +252,4 @@ def task_load_record_from_xml_url( }, ) + diff --git a/pid_provider/wagtail_hooks.py b/pid_provider/wagtail_hooks.py index 228742585..59a56fb88 100644 --- a/pid_provider/wagtail_hooks.py +++ b/pid_provider/wagtail_hooks.py @@ -6,7 +6,7 @@ from config.menu import get_menu_order from core.views import CommonControlFieldViewSet -from pid_provider.models import XMLVersion, FixPidV2, OtherPid, PidProviderConfig, PidProviderXML +from pid_provider.models import XMLURL, XMLVersion, FixPidV2, OtherPid, PidProviderConfig, PidProviderXML class PidProviderXMLViewSet(CommonControlFieldViewSet): @@ -178,6 +178,26 @@ class XMLVersionViewSet(CommonControlFieldViewSet): "available_since", ) +class XMLURLViewSet(CommonControlFieldViewSet): + model = XMLURL + menu_label = _("XML URLs") + menu_icon = "folder" + menu_order = 300 + add_to_settings_menu = False + list_per_page = 10 + + # Configuração de listagem + list_display = [ + "url", + "status", + "pid", + ] + search_fields = ( + "url", + "status", + "pid", + ) + # Grupo de ViewSets class PidProviderViewSetGroup(SnippetViewSetGroup): @@ -190,6 +210,7 @@ class PidProviderViewSetGroup(SnippetViewSetGroup): FixPidV2ViewSet, PidProviderConfigViewSet, XMLVersionViewSet, + XMLURLViewSet, ) diff --git a/proc/article_controller.py b/proc/article_controller.py index d42d7d7dc..1409c6724 100644 --- a/proc/article_controller.py +++ b/proc/article_controller.py @@ -1,6 +1,11 @@ import logging import sys -from datetime import datetime + +from django.db.models import Q, Count +from django.utils.translation import gettext_lazy as _ + +from migration import choices as migration_choices +from proc.models import ArticleProc def log_event(execution_log, level, event_type, message, extra_data=None): @@ -182,3 +187,83 @@ def publish_collection_articles( statistics["qa_api_data_error"] = bool(qa_api_data.get("error")) statistics["public_api_data_error"] = bool(public_api_data.get("error")) return statistics, execution_log + + +class ClassicWebsiteArticlePidTracker: + TASK_NAME = "proc.source_classic_website.track_classic_website_article_pids" + + def __init__(self, user, collection): + self.user = user + self.collection = collection + + def create_article_proc_for_pids(self, pids): + if not pids: + return [] + registered_pids = set( + ArticleProc.objects.filter( + collection=self.collection, pid__in=pids + ).values_list("pid", flat=True) + ) + todo = set(pids) - registered_pids + for pid in todo: + yield ArticleProc( + pid=pid, + collection=self.collection, + creator=self.user, + pid_status=migration_choices.PID_STATUS_MISSING, + ) + + def update_pid_status(self, classic_website_pids): + + pids = set(classic_website_pids) + + qs = ArticleProc.objects.filter( + collection=self.collection + ).exclude( + pid_status__in=[ + migration_choices.PID_STATUS_EXCEEDING, + ] + ) + + qs.filter( + migrated_data__isnull=False, + ).exclude( + pid_status__in=[migration_choices.PID_STATUS_MATCHED] + ).update(pid_status=migration_choices.PID_STATUS_MATCHED) + pids = pids - set(qs.filter(pid_status=migration_choices.PID_STATUS_MATCHED).values_list("pid", flat=True)) + + qs.filter( + migrated_data__isnull=True, + ).exclude( + pid_status__in=[migration_choices.PID_STATUS_MISSING], + ).update(pid_status=migration_choices.PID_STATUS_MISSING) + pids = pids - set(qs.filter(pid_status=migration_choices.PID_STATUS_MISSING).values_list("pid", flat=True)) + + qs.exclude( + pid_status__in=[migration_choices.PID_STATUS_MATCHED, + migration_choices.PID_STATUS_MISSING] + ).update(pid_status=migration_choices.PID_STATUS_EXCEEDING) + pids = pids - set(qs.filter(pid_status=migration_choices.PID_STATUS_EXCEEDING).values_list("pid", flat=True)) + + return pids + + def bulk_create(self, new_pids): + if new_pids: + ArticleProc.objects.bulk_create(self.create_article_proc_for_pids(new_pids), batch_size=500) + + +def track_classic_website_article_pids( + user, collection, classic_website_config, +): + tracker = ClassicWebsiteArticlePidTracker(user, collection) + classic_website_pids = set(classic_website_config.get_pid_list()) + + new_pids = set(tracker.update_pid_status(classic_website_pids)) + tracker.bulk_create(new_pids) + + data = {"collection": collection.acron} + for item in ArticleProc.objects.filter(collection=collection).values("pid_status").annotate( + total=Count("pid") + ): + data[item["pid_status"]] = item["total"] + return data \ No newline at end of file diff --git a/proc/controller.py b/proc/controller.py index b118e80a2..f09779bcd 100644 --- a/proc/controller.py +++ b/proc/controller.py @@ -8,7 +8,6 @@ ProcBaseException, UnableToCreateIssueProcsError, ) -from proc.models import IssueProc, JournalProc # Imports de Publication from proc.publisher import ( @@ -31,9 +30,12 @@ # Imports da API Core from proc.source_core_api import ( + BaseDataChecker, FetchIssueDataException, FetchJournalDataException, FetchMultipleJournalsError, + IssueDataChecker, + JournalDataChecker, UnableToGetJournalDataFromCoreError, create_or_update_issue, create_or_update_journal, @@ -48,6 +50,10 @@ "create_or_update_issue", "fetch_and_create_journal", "fetch_and_create_issues", + # Core API classes + "BaseDataChecker", + "JournalDataChecker", + "IssueDataChecker", # Classic Website functions "create_or_update_migrated_journal", "create_or_update_migrated_issue", @@ -72,76 +78,10 @@ def ensure_journal_proc_exists(user, journal): - """ - Verifica e garante a existência de JournalProc para o journal - - Args: - user: O usuário que executa a operação - journal: O journal que deve ter um JournalProc - - Returns: - JournalProc: O objeto JournalProc existente ou recém-criado - - Raises: - JournalProc.DoesNotExist: Se não foi possível criar JournalProc - """ - # Verificar se já existe - if ( - journal.missing_fields - or not JournalProc.objects.filter(journal=journal, acron__isnull=False).exists() - ): - # Não existe, criar um novo - create_or_update_journal( - journal_title=journal.title, - issn_electronic=journal.official_journal.issn_electronic, - issn_print=journal.official_journal.issn_print, - user=user, - force_update=True, - ) - - journal_proc = JournalProc.objects.filter( - journal=journal, acron__isnull=False - ).first() - if journal_proc: - if not journal.journal_acron: - journal.journal_acron = journal_proc.acron - journal.save() - return True - - raise JournalProc.DoesNotExist(f"JournalProc does not exist: {journal}") + """Delega para JournalDataChecker.ensure_proc_exists.""" + return JournalDataChecker.ensure_proc_exists(user, journal) def ensure_issue_proc_exists(user, issue): - """ - Verifica e garante a existência de IssueProc para o issue - - Args: - user: O usuário que executa a operação - issue: O issue que deve ter um IssueProc - - Returns: - IssueProc: O objeto IssueProc existente ou recém-criado - - Raises: - IssuePrerequisiteError: Se não foi possível criar IssueProc - """ - # Verificar se o IssueProc já existe - if IssueProc.objects.filter(issue=issue).exists(): - return True - - # Não existe, criar um novo - create_or_update_issue( - journal=issue.journal, - pub_year=issue.publication_year, - volume=issue.volume, - suppl=issue.supplement, - number=issue.number, - user=user, - force_update=True, - ) - - # Verificar se foi criado - if IssueProc.objects.filter(issue=issue).exists(): - return True - - raise IssueProc.DoesNotExist(f"IssueProc does not exist: {issue}") + """Delega para IssueDataChecker.ensure_proc_exists.""" + return IssueDataChecker.ensure_proc_exists(user, issue) diff --git a/proc/migrations/0013_articleproc_pid_status_and_more.py b/proc/migrations/0013_articleproc_pid_status_and_more.py new file mode 100644 index 000000000..2c1d7b80c --- /dev/null +++ b/proc/migrations/0013_articleproc_pid_status_and_more.py @@ -0,0 +1,40 @@ +# Generated by Django 5.2.3 on 2026-03-18 19:54 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("collection", "0004_alter_collection_acron_alter_collection_name_and_more"), + ("migration", "0011_alter_journalacronidfile_file_and_more"), + ("package", "0007_delete_previewarticlepage"), + ("proc", "0012_alter_articleproc_processed_xml_and_more"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name="articleproc", + name="pid_status", + field=models.CharField( + blank=True, + choices=[ + ("missing", "Missing"), + ("matched", "Matched"), + ("exceeding", "Exceeding"), + ("unknown", "Unknown"), + ], + default="unknown", + max_length=10, + null=True, + verbose_name="PID Status", + ), + ), + migrations.AddIndex( + model_name="articleproc", + index=models.Index( + fields=["pid_status"], name="proc_articl_pid_sta_604d39_idx" + ), + ), + ] diff --git a/proc/models.py b/proc/models.py index 40d83317c..cfe56bcba 100644 --- a/proc/models.py +++ b/proc/models.py @@ -28,10 +28,12 @@ from collection.models import Collection from core.models import CommonControlField from core.utils.file_utils import delete_files +from core.utils.sanitize import sanitize_for_json from htmlxml.models import HTMLXML from issue.models import Issue from journal.choices import JOURNAL_AVAILABILTY_STATUS from journal.models import Journal +from migration import choices as migration_choices from migration.controller import ( PkgZipBuilder, XMLVersionXmlWithPreError, @@ -58,6 +60,7 @@ class NoDocumentRecordsToMigrateError(Exception): ... + class Operation(CommonControlField): name = models.CharField( @@ -169,7 +172,7 @@ def finish( try: json.dumps(detail) except Exception as exc_detail: - detail = str(detail) + detail = sanitize_for_json(detail) self.detail = detail self.completed = completed @@ -1538,6 +1541,14 @@ class ArticleProc(BaseProc, ClusterableModel): blank=True, null=True, ) + pid_status = models.CharField( + _("PID Status"), + max_length=10, + choices=migration_choices.PID_STATUS, + default=migration_choices.PID_STATUS_UNKNOWN, + blank=True, + null=True, + ) processed_xml = models.FileField( _("Processed XML"), upload_to=proc_directory_path, @@ -1556,6 +1567,7 @@ class ArticleProc(BaseProc, ClusterableModel): AutocompletePanel("sps_pkg", read_only=True), ] panel_status = [ + FieldPanel("pid_status"), FieldPanel("xml_status"), FieldPanel("sps_pkg_status"), FieldPanel("migration_status"), @@ -1582,6 +1594,7 @@ class Meta: ordering = ["-updated"] indexes = [ models.Index(fields=["pkg_name"]), + models.Index(fields=["pid_status"]), models.Index(fields=["xml_status"]), models.Index(fields=["sps_pkg_status"]), ] diff --git a/proc/source_core_api.py b/proc/source_core_api.py index b1b40b457..ecbfdcb8d 100644 --- a/proc/source_core_api.py +++ b/proc/source_core_api.py @@ -4,6 +4,7 @@ import logging import sys +from abc import ABC, abstractmethod from django.conf import settings from django.db.models import Q @@ -64,6 +65,141 @@ class FetchIssueDataException(ProcBaseException): pass +class BaseDataChecker(ABC): + """Classe base abstrata com lógica comum de consulta local-first com fallback ao core.""" + + model = None # Journal ou Issue — definido nas subclasses + key = None # "journal" ou "issue" — definido nas subclasses + + def __init__(self, user): + self._user = user + self.core_communication_error = False + + @abstractmethod + def get_local(self): + """Consulta dados locais. Deve ser implementado pelas subclasses.""" + + @abstractmethod + def fetch_from_core(self, **kwargs): + """Consulta dados remotos e atualiza os dados locais. Deve ser implementado pelas subclasses.""" + + def get_or_fetch(self): + """Consulta dados locais; se inexistentes, consulta o core e tenta novamente.""" + # 1. consulta dados locais + try: + return self.get_local() + except self.model.DoesNotExist: + pass + + # 2. dados locais inexistentes, consulta dados remotos + # e atualiza os dados locais com os dados remotos + self.fetch_from_core() + + # 3. consulta dados locais novamente após a tentativa de busca remota + try: + return self.get_local() + except self.model.DoesNotExist: + return None + + def refresh(self, response): + """Consulta dados remotos e atualiza response.""" + self.fetch_from_core() + if self.core_communication_error: + response["core_communication_error"] = True + return + + # consulta dados locais após a atualização remota + try: + response[self.key] = self.get_local() + except self.model.DoesNotExist: + pass + + +class JournalDataChecker(BaseDataChecker): + """Consulta e valida dados de journal usando dados locais e API do core.""" + + model = Journal + key = "journal" + + def __init__(self, journal_title, issn_electronic, issn_print, user): + super().__init__(user) + self.journal_title = journal_title + self.issn_electronic = issn_electronic + self.issn_print = issn_print + + @classmethod + def from_xmltree(cls, xmltree, user): + """Cria instância a partir de xmltree.""" + from packtools.sps.models.journal_meta import ISSN, Title + + xml = Title(xmltree) + journal_title = xml.journal_title + xml = ISSN(xmltree) + issn_electronic = xml.epub + issn_print = xml.ppub + return cls(journal_title, issn_electronic, issn_print, user) + + def get_local(self): + """Consulta dados locais de journal.""" + return Journal.get_registered( + self.journal_title, self.issn_electronic, self.issn_print + ) + + def fetch_from_core(self, force_update=True): + """Consulta dados remotos de journal e atualiza os dados locais.""" + self.core_communication_error = False + try: + fetch_and_create_journal( + self._user, + issn_electronic=self.issn_electronic, + issn_print=self.issn_print, + force_update=force_update, + ) + except FetchJournalDataException as e: + self.core_communication_error = True + logging.warning(f"Core API communication failure for journal: {e}") + + @staticmethod + def ensure_proc_exists(user, journal): + """ + Verifica e garante a existência de JournalProc para o journal. + + Args: + user: O usuário que executa a operação + journal: O journal que deve ter um JournalProc + + Returns: + True se JournalProc existe + + Raises: + JournalProc.DoesNotExist: Se não foi possível criar JournalProc + """ + if ( + journal.missing_fields + or not JournalProc.objects.filter( + journal=journal, acron__isnull=False + ).exists() + ): + create_or_update_journal( + journal_title=journal.title, + issn_electronic=journal.official_journal.issn_electronic, + issn_print=journal.official_journal.issn_print, + user=user, + force_update=True, + ) + + journal_proc = JournalProc.objects.filter( + journal=journal, acron__isnull=False + ).first() + if journal_proc: + if not journal.journal_acron: + journal.journal_acron = journal_proc.acron + journal.save() + return True + + raise JournalProc.DoesNotExist(f"JournalProc does not exist: {journal}") + + def create_or_update_journal( journal_title, issn_electronic, issn_print, user, force_update=None ): @@ -80,9 +216,12 @@ def create_or_update_journal( | Q(journal__official_journal__issn_print=issn_print) ).exists() ) + + checker = JournalDataChecker(journal_title, issn_electronic, issn_print, user) + if not force_update: try: - return Journal.get_registered(journal_title, issn_electronic, issn_print) + return checker.get_local() except Journal.DoesNotExist: pass @@ -99,7 +238,7 @@ def create_or_update_journal( pass try: - return Journal.get_registered(journal_title, issn_electronic, issn_print) + return checker.get_local() except Journal.DoesNotExist as exc: return None @@ -135,7 +274,7 @@ def fetch_and_create_journal( issn_electronic=issn_electronic, issn_print=issn_print, ) - + for result in results: try: process_journal_result( @@ -227,7 +366,7 @@ def process_journal_result( ) official_journal.add_related_journal( result.get("previous_journal_title"), - (result.get("next_journal_title") or {}).get("next_journal_title"), + result.get("next_journal_title"), ) # Cria/atualiza o journal @@ -239,15 +378,14 @@ def process_journal_result( ) journal.core_synchronized = False journal.contact_address = result.get("contact_address") - journal.contat_name = result.get("contact_name") + journal.contact_name = result.get("contact_name") # Atualiza campos adicionais do journal journal.license_code = result.get("journal_use_license") journal.nlm_title = result.get("nlm_title") journal.doi_prefix = result.get("doi_prefix") - journal.wos_areas = result["wos_areas"] - journal.logo_url = result["url_logo"] + journal.wos_areas = result.get("wos_areas", []) + journal.logo_url = result.get("url_logo") journal.submission_online_url = result.get("submission_online_url") - journal.doi_prefix = result.get("doi_prefix") journal.save() journal.journal_email.all().delete() @@ -296,6 +434,10 @@ def process_journal_result( ) journal.sponsor.add(Sponsor.create_or_update(user, journal, institution)) + # Processa subject descriptors (novo campo da API) + for item in result.get("subject_descriptor") or []: + journal.subject.add(Subject.create_or_update(user, item["value"])) + no_lang = [] for item in result.get("mission"): if not item["language"]: @@ -320,7 +462,7 @@ def process_journal_result( journal=journal, acron=item["journal_acron"], title=journal.title, - availability_status=item.get("availability_status") or "C", + availability_status=item.get("status") or "C", migration_status=tracker_choices.PROGRESS_STATUS_DONE, force_update=force_update, ) @@ -344,9 +486,111 @@ def process_journal_result( ) journal.core_synchronized = True journal.save() + + # TODO: Campos da API não processados ainda: + # - copyright (array) + # - table_of_contents (array) + # - location (object with city_name, state_name, country_name, etc.) + # - text_language (array) + # - title_in_database (array) + # - crossmark_policy (array) + # - acronym (root level field) + # - other_titles + return journal +class IssueDataChecker(BaseDataChecker): + """Consulta e valida dados de issue usando dados locais e API do core.""" + + model = Issue + key = "issue" + + def __init__(self, journal, publication_year, volume, suppl, number, user): + super().__init__(user) + self._journal = journal + self.publication_year = publication_year + self.volume = volume + self.suppl = suppl + self.number = number + + @classmethod + def from_xmltree(cls, xmltree, user, journal): + """Cria instância a partir de xmltree.""" + from packtools.sps.models.dates import ArticleDates + from packtools.sps.models.front_articlemeta_issue import ArticleMetaIssue + + xml = ArticleDates(xmltree) + try: + publication_year = xml.collection_date["year"] + except (TypeError, KeyError, ValueError): + try: + publication_year = xml.article_date["year"] + except (TypeError, KeyError, ValueError): + publication_year = None + + xml = ArticleMetaIssue(xmltree) + return cls(journal, publication_year, xml.volume, xml.suppl, xml.number, user) + + def get_local(self): + """Consulta dados locais de issue.""" + return Issue.get( + journal=self._journal, + volume=self.volume, + supplement=self.suppl, + number=self.number, + ) + + def fetch_from_core(self): + """Consulta dados remotos de issue e atualiza os dados locais.""" + self.core_communication_error = False + try: + fetch_and_create_issues( + self._journal, + self.publication_year, + self.volume, + self.suppl, + self.number, + self._user, + ) + except FetchIssueDataException as e: + self.core_communication_error = True + logging.warning(f"Core API communication failure for issue: {e}") + + @staticmethod + def ensure_proc_exists(user, issue): + """ + Verifica e garante a existência de IssueProc para o issue. + + Args: + user: O usuário que executa a operação + issue: O issue que deve ter um IssueProc + + Returns: + True se IssueProc existe + + Raises: + IssueProc.DoesNotExist: Se não foi possível criar IssueProc + """ + if IssueProc.objects.filter(issue=issue).exists(): + return True + + create_or_update_issue( + journal=issue.journal, + pub_year=issue.publication_year, + volume=issue.volume, + suppl=issue.supplement, + number=issue.number, + user=user, + force_update=True, + ) + + if IssueProc.objects.filter(issue=issue).exists(): + return True + + raise IssueProc.DoesNotExist(f"IssueProc does not exist: {issue}") + + def create_or_update_issue( journal, pub_year, volume, suppl, number, user, force_update=None ): @@ -367,29 +611,22 @@ def create_or_update_issue( ).exists() ) + checker = IssueDataChecker(journal, pub_year, volume, suppl, number, user) + if not force_update: try: - return Issue.get( - journal=journal, - volume=volume, - supplement=suppl, - number=number, - ) + return checker.get_local() except Issue.DoesNotExist: pass try: fetch_and_create_issues(journal, pub_year, volume, suppl, number, user) except FetchIssueDataException as exc: + logging.warning(f"Erro ao buscar dados de issue: {exc}") pass try: - return Issue.get( - journal=journal, - volume=volume, - supplement=suppl, - number=number, - ) + return checker.get_local() except Issue.DoesNotExist as exc: return None @@ -397,51 +634,144 @@ def create_or_update_issue( def fetch_and_create_issues(journal, pub_year, volume, suppl, number, user): """ Busca dados de issues na API Core e cria/atualiza as entidades correspondentes. + Agora com suporte a paginação para processar todos os resultados. """ if not settings.ISSUE_API_URL: return None if journal: issn_print = journal.official_journal.issn_print issn_electronic = journal.official_journal.issn_electronic + + # Processa issues com paginação + for result in fetch_issue_data_with_pagination( + issn_print=issn_print, + issn_electronic=issn_electronic, + volume=volume, + ): + try: + process_issue_result(user, journal, result) + except Exception as e: + logging.exception(e) + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + e=e, + exc_traceback=exc_traceback, + detail={ + "task": "proc.source_core_api.fetch_and_create_issues", + "username": user.username, + "issn_print": issn_print, + "issn_electronic": issn_electronic, + "volume": volume, + "data": result, + }, + ) + + +def fetch_issue_data_with_pagination( + issn_print=None, + issn_electronic=None, + volume=None, +): + """ + Busca dados de issues na API Core com suporte a paginação. + Retorna um gerador que yield cada resultado individualmente. + """ + # Parâmetros iniciais + params = { + "issn_print": issn_print, + "issn_electronic": issn_electronic, + "volume": volume, + } + params = {k: v for k, v in params.items() if v} + + url = settings.ISSUE_API_URL + if not url: + return [] + + while url: try: - params = { - "issn_print": issn_print, - "issn_electronic": issn_electronic, - "volume": volume, - } - params = {k: v for k, v in params.items() if v} response = fetch_data( - url=settings.ISSUE_API_URL, - params=params, + url=url, + params=params, # Params só na primeira requisição json=True, timeout=CORE_TIMEOUT, ) - except Exception as e: raise FetchIssueDataException( - f"fetch_and_create_issue: {settings.ISSUE_API_URL} {params} {e}" + f"fetch_issue_data_with_pagination: {url} {params} {e}" ) + else: + # Próxima URL (se existir) + url = response.get("next") + params = {} + yield from response.get("results") or [] - issue = None - for result in response.get("results") or []: - logging.info(f"fetch_and_create_issues {params}: {result}") - issue = Issue.get_or_create( - journal=journal, - volume=result["volume"], - supplement=result["supplement"], - number=result["number"], - publication_year=result["year"], - user=user, + +def process_issue_result(user, journal, result): + """ + Processa um único resultado de issue da API e cria/atualiza as entidades correspondentes. + """ + logging.info( + f"process_issue_result: {journal} {result.get('volume')} {result.get('number')} {result.get('supplement')}" + ) + + # Cria/atualiza o issue com todos os campos da API + issue = Issue.get_or_create( + journal=journal, + volume=result.get("volume"), + supplement=result.get("supplement"), + number=result.get("number"), + publication_year=result.get("year"), + user=user, + order=result.get("order"), + issue_pid_suffix=result.get("issue_pid_suffix"), + ) + + # Atualiza campos adicionais do issue se disponíveis na API + if hasattr(issue, "season") and result.get("season"): + issue.season = result.get("season") + if hasattr(issue, "month") and result.get("month"): + issue.month = result.get("month") + + issue.save() + + for journal_proc in JournalProc.objects.filter(journal=journal): + try: + issue_proc = IssueProc.objects.get( + collection=journal_proc.collection, issue=issue + ) + except IssueProc.DoesNotExist: + issue_proc = IssueProc.create_from_journal_proc_and_issue( + user, + journal_proc, + issue, ) - for journal_proc in JournalProc.objects.filter(journal=journal): - try: - issue_proc = IssueProc.objects.get( - collection=journal_proc.collection, issue=issue - ) - except IssueProc.DoesNotExist: - issue_proc = IssueProc.create_from_journal_proc_and_issue( - user, - journal_proc, - issue, - ) + # TODO: Processar campos adicionais da API de issues: + # - legacy_issue (array) - PIDs legados + # - sections (array) - seções do issue + # - issue_titles (array) - títulos específicos + # - bibliographic_strips (array) - strips bibliográficas + # - license (array) - licenças específicas do issue + + +# TODO FUTURO - Campos da API de Issues não processados ainda: +# Os seguintes campos estão disponíveis na API de issues mas ainda não são processados: +# +# 1. legacy_issue: Array com PIDs legados para compatibilidade +# Exemplo: [{"pid": "0044-596720230001", "collection": "scl"}] +# +# 2. sections: Array de seções do issue com códigos e textos multilíngue +# Exemplo: [{"text": "Original Articles", "code": "AA670", "language": "en", ...}] +# +# 3. issue_titles: Array de títulos específicos do issue (se houver) +# +# 4. bibliographic_strips: Array de strips bibliográficas em diferentes idiomas +# Exemplo: [{"text": "Acta Amaz., vol.53, no.1, Manaus, Jan./Mar., 2023", "language": "en"}] +# +# 5. license: Array de licenças específicas do issue (se diferentes do journal) +# +# Para implementar esses campos no futuro, será necessário: +# - Criar modelos para IssueSection, IssueBibliographicStrip, etc. ou +# - Adicionar campos JSON no modelo Issue para armazenar esses dados +# - Implementar processamento desses campos na função process_issue_result diff --git a/proc/tasks.py b/proc/tasks.py index 6f94b4e20..92a71ae7b 100644 --- a/proc/tasks.py +++ b/proc/tasks.py @@ -19,8 +19,8 @@ create_or_update_migrated_journal, fetch_and_create_journal, migrate_issue, - migrate_journal, ) +from proc.article_controller import track_classic_website_article_pids from proc.models import ArticleProc, IssueProc, JournalProc from publication.api.document import publish_article from publication.api.issue import publish_issue, sync_issue @@ -356,6 +356,7 @@ def task_publish_journals( collection_acron=None, journal_acron=None, force_update=False, + verify=False, ): task_params = { "task": "proc.tasks.task_publish_journals", @@ -378,6 +379,7 @@ def task_publish_journals( api_data = get_api_data(collection, "journal", website_kind) if not api_data or api_data.get("error"): continue + api_data["verify"] = verify task_exec = TaskExecution( name="proc.tasks.task_publish_journals", item=f"{collection_acron}-{journal_acron} {website_kind}", @@ -651,6 +653,7 @@ def task_publish_issues( issue_folder=None, publication_year=None, force_update=False, + verify=False, ): task_params = { "collection_acron": collection_acron, @@ -676,6 +679,7 @@ def task_publish_issues( api_data = get_api_data(collection, "issue", website_kind) if not api_data or api_data.get("error"): continue + api_data["verify"] = verify task_exec = TaskExecution( name="proc.tasks.task_publish_issues", item=f"{collection_acron}-{journal_acron}-{issue_folder}-{publication_year} {website_kind}", @@ -944,6 +948,14 @@ def task_migrate_and_publish_articles_by_journal( task_exec.add_event(f"Select journal issues which docs_status or files_status in {status} and/or has articles to process") events = [] + article_issue_proc_list = ArticleProc.objects.filter(issue_proc__journal_proc=journal_proc).filter( + Q(migration_status__in=status) + | Q(xml_status__in=status) + | Q(sps_pkg_status__in=status) + | Q(qa_ws_status__in=status) + | Q(public_ws_status__in=status) + ).values_list("issue_proc__id", "issue_proc__pid").distinct() + issue_proc_list = IssueProc.get_id_and_pid_list_to_process( journal_proc, issue_folder, @@ -952,7 +964,9 @@ def task_migrate_and_publish_articles_by_journal( status, events, ) - total_issues_to_process = issue_proc_list.count() + + issue_proc_list = set(issue_proc_list) | set(article_issue_proc_list) + total_issues_to_process = len(issue_proc_list) task_exec.add_event(events) task_exec.add_number("total_issues_to_process", total_issues_to_process) task_exec.total_to_process = total_issues_to_process @@ -1207,6 +1221,7 @@ def task_publish_articles( issue_folder=None, publication_year=None, force_update=False, + verify=False, ): task_params = { "user_id": user_id, @@ -1234,6 +1249,7 @@ def task_publish_articles( api_data = get_api_data(collection, "article", website_kind) if not api_data or api_data.get("error"): continue + api_data["verify"] = verify task_exec = TaskExecution( name="proc.tasks.task_publish_articles", @@ -1480,22 +1496,14 @@ def task_exclude_article_repetition(self, journal_proc_id, qa_api_data=None, pub response = Article.fix_sps_pkg_names(journal_articles_to_fix_sps_pkg_names_qs) task_exec.add_event(f"fixed sps_pkg_names: {response}") - issues = set() - for field_name in ("pid_v2", "sps_pkg__sps_pkg_name"): - repeated_items = Article.get_repeated_items(field_name, journal) - task_exec.add_number(f"repeated_by_{field_name}", repeated_items.count()) - for repeated_value in repeated_items: - try: - events = Article.exclude_repetitions(user, field_name, repeated_value, timeout=timeout) - task_exec.add_event(events) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - task_exec.add_exception( - { - f"repeated_by_{field_name}": repeated_value, - "traceback": traceback.format_exc(), - } - ) + results = Article.exclude_inconvenient_articles(journal, user, timeout) + for event in results["events"]: + task_exec.add_event(event) + for key, value in results["numbers"].items(): + task_exec.add_number(key, value) + for exc in results["exceptions"]: + task_exec.add_exception(exc) + task_exec.finish() except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -1584,3 +1592,33 @@ def task_remove_duplicate_issues( except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() task_exec.finish(exception=e, exc_traceback=exc_traceback) + + +@celery_app.task(bind=True) +def task_track_classic_website_article_pids( + self, username, user_id=None, collection_acron=None, +): + task_params = { + "username": username, + "collection_acron": collection_acron, + } + task_exec = TaskExecution( + name="proc.tasks.task_track_classic_website_article_pids", + item=f"{collection_acron or 'all'}", + params=task_params, + ) + try: + user = _get_user(user_id=user_id, username=username) + for collection in _get_collections(collection_acron): + classic_website_config = controller.get_classic_website_config( + collection.acron + ) + result = track_classic_website_article_pids( + user, collection, classic_website_config, + ) + if result: + task_exec.add_event(result) + task_exec.finish() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + task_exec.finish(exception=e, exc_traceback=exc_traceback) diff --git a/proc/tests.py b/proc/tests.py new file mode 100644 index 000000000..d0af74de6 --- /dev/null +++ b/proc/tests.py @@ -0,0 +1,102 @@ +import json +import unittest + +from core.utils.sanitize import sanitize_for_json + + +class SanitizeForJsonTest(unittest.TestCase): + """Tests for sanitize_for_json function that removes surrogate characters.""" + + def test_plain_string_unchanged(self): + self.assertEqual(sanitize_for_json("hello world"), "hello world") + + def test_string_with_surrogate_is_replaced(self): + # \udce1 is a surrogate escape for byte 0xe1 (Latin-1 'á') + text = "v4n2 Sum\udce1rio" + result = sanitize_for_json(text) + self.assertNotIn("\udce1", result) + # The surrogate should be replaced with the Unicode replacement character + self.assertIn("\ufffd", result) + + def test_result_is_valid_json(self): + detail = { + "failures": [ + {"file": "/scielo_www/revenf/bases/pdf/ccs/v4n2/v4n2 Sum\udce1rio.pdf"} + ], + "migrated": 5, + } + result = sanitize_for_json(detail) + # Must not raise + json_str = json.dumps(result) + self.assertIn("Sum", json_str) + + def test_dict_with_surrogates_in_keys_and_values(self): + data = {"\udce1key": "val\udce1ue"} + result = sanitize_for_json(data) + json_str = json.dumps(result) + self.assertNotIn("\\udce1", json_str) + + def test_nested_list_with_surrogates(self): + data = [["Sum\udce1rio", "normal"], "ok\udce9"] + result = sanitize_for_json(data) + json_str = json.dumps(result) + self.assertNotIn("\\udce1", json_str) + self.assertNotIn("\\udce9", json_str) + + def test_non_string_values_preserved(self): + data = {"count": 42, "flag": True, "empty": None} + result = sanitize_for_json(data) + self.assertEqual(result, data) + + def test_empty_dict(self): + self.assertEqual(sanitize_for_json({}), {}) + + def test_empty_string(self): + self.assertEqual(sanitize_for_json(""), "") + + def test_normal_unicode_preserved(self): + # Normal accented characters should be preserved + text = "Sumário" + self.assertEqual(sanitize_for_json(text), "Sumário") + + def test_multiple_surrogates_in_same_string(self): + text = "\udce1\udce9\udcf3" + result = sanitize_for_json(text) + # All surrogates should be handled + json.dumps(result) # Must not raise + + def test_tuple_converted_to_list(self): + data = ("a\udce1", "b") + result = sanitize_for_json(data) + self.assertIsInstance(result, list) + json.dumps(result) # Must not raise + + def test_complex_detail_dict_from_real_error(self): + """Simulate the actual error scenario from the issue.""" + detail = { + "failures": [ + { + "file": "/scielo_www/revenf/bases/pdf/ccs/v4n2/v4n2 Sum\udce1rio.pdf", + "error": "some error", + "type": "", + } + ], + "migrated": 3, + } + result = sanitize_for_json(detail) + json_str = json.dumps(result) + parsed = json.loads(json_str) + self.assertEqual(parsed["migrated"], 3) + self.assertEqual(len(parsed["failures"]), 1) + self.assertIn("Sum", parsed["failures"][0]["file"]) + + def test_high_surrogate_handled(self): + """High surrogates (not from surrogateescape) are also handled.""" + text = "test\ud800value" + result = sanitize_for_json(text) + json.dumps(result) # Must not raise + self.assertNotIn("\ud800", result) + + +if __name__ == "__main__": + unittest.main() diff --git a/proc/wagtail_hooks.py b/proc/wagtail_hooks.py index 5dd89466c..6fa6dfe63 100644 --- a/proc/wagtail_hooks.py +++ b/proc/wagtail_hooks.py @@ -217,6 +217,7 @@ class ArticleProcViewSet(CommonControlFieldViewSet): list_display = [ "__str__", + "pid_status", "migration_status", "xml_status", "sps_pkg_status", @@ -226,6 +227,7 @@ class ArticleProcViewSet(CommonControlFieldViewSet): ] list_filter = [ "collection", + "pid_status", "migration_status", "xml_status", "sps_pkg_status", diff --git a/production-v3.0.0rc4.yml b/production-v3.0.0rc4.yml index 19182bd20..f281a3958 100644 --- a/production-v3.0.0rc4.yml +++ b/production-v3.0.0rc4.yml @@ -1,5 +1,5 @@ x-django-function: &django-function - image: infrascielo/upload:${SCMS_WEBAPP_VERSION:-v2.10.4} + image: infrascielo/upload:${SCMS_WEBAPP_VERSION} restart: unless-stopped platform: linux/x86_64 depends_on: @@ -48,9 +48,12 @@ services: celeryworker: <<: *django-function - container_name: upload_production_celeryworker command: /start-celeryworker ports: [] + deploy: + resources: + limits: + memory: 2.5G celerybeat: <<: *django-function diff --git a/publication/api/journal.py b/publication/api/journal.py index 9c15e0bfa..2c81e9415 100644 --- a/publication/api/journal.py +++ b/publication/api/journal.py @@ -1,4 +1,5 @@ import logging +import re from django.utils.translation import gettext_lazy as _ @@ -200,6 +201,20 @@ def add_sponsor(self, sponsor): # Sponsors self.data["sponsors"].append({"name": sponsor}) + @staticmethod + def _clean_br_tags(text): + """ + Replace
    ,
    ,
    HTML tags with ", " and clean up + resulting duplicate commas and extra whitespace. + """ + if not text: + return text + # Replace
    variants (case insensitive) with ", " + text = re.sub(r"", ", ", text, flags=re.IGNORECASE) + # Collapse sequences of commas and whitespace (e.g. ", , " -> ", ") + text = re.sub(r"(\s*,\s*)+", ", ", text) + return text.strip(", ") + def add_contact(self, name, email, address, city, state, country): # email to contact # self.data["editor_email"] = email @@ -210,7 +225,7 @@ def add_contact(self, name, email, address, city, state, country): # self.data["publisher_country"] = country self.data["contact"] = { "email": email, - "address": address, + "address": self._clean_br_tags(address), } def add_logo_url(self, logo_url): diff --git a/publication/api/publication.py b/publication/api/publication.py index 974db5262..9e3196528 100644 --- a/publication/api/publication.py +++ b/publication/api/publication.py @@ -42,7 +42,8 @@ def __init__( password=None, timeout=None, token=None, - enabled=None + enabled=None, + verify=False, ): self.timeout = timeout or 15 self.post_data_url = post_data_url @@ -51,6 +52,7 @@ def __init__( self.password = password self.token = token self.enabled = enabled + self.verify = verify if not token and enabled: self.get_token() @@ -63,6 +65,7 @@ def data(self): password=self.password, token=self.token, enabled=self.enabled, + verify=self.verify, ) def post_data(self, payload, kwargs=None): @@ -147,7 +150,7 @@ def _post_data(self, payload, token, kwargs=None): data=json.dumps(payload), headers=header, timeout=self.timeout, - verify=False, + verify=self.verify, json=True, ) diff --git a/publication/api/test_journal.py b/publication/api/test_journal.py new file mode 100644 index 000000000..2d9f800bd --- /dev/null +++ b/publication/api/test_journal.py @@ -0,0 +1,119 @@ +from unittest import TestCase + +from publication.api.journal import JournalPayload + + +class JournalPayloadCleanBrTagsTest(TestCase): + def test_clean_br_tags_removes_br(self): + result = JournalPayload._clean_br_tags( + "PO Box 339,
    Bloemfontein, Free State" + ) + self.assertEqual(result, "PO Box 339, Bloemfontein, Free State") + + def test_clean_br_tags_removes_br_self_closing(self): + result = JournalPayload._clean_br_tags( + "PO Box 339,
    Bloemfontein, Free State" + ) + self.assertEqual(result, "PO Box 339, Bloemfontein, Free State") + + def test_clean_br_tags_removes_br_self_closing_with_space(self): + result = JournalPayload._clean_br_tags( + "PO Box 339,
    Bloemfontein, Free State" + ) + self.assertEqual(result, "PO Box 339, Bloemfontein, Free State") + + def test_clean_br_tags_case_insensitive(self): + result = JournalPayload._clean_br_tags( + "PO Box 339,
    Bloemfontein,
    Free State" + ) + self.assertEqual(result, "PO Box 339, Bloemfontein, Free State") + + def test_clean_br_tags_without_surrounding_comma(self): + result = JournalPayload._clean_br_tags( + "Address Line 1
    Address Line 2" + ) + self.assertEqual(result, "Address Line 1, Address Line 2") + + def test_clean_br_tags_multiple(self): + result = JournalPayload._clean_br_tags( + "Avenida Dr. Arnaldo, 715
    01246-904 São Paulo SP Brazil
    Tel./Fax: +55 11 3061-7985" + ) + self.assertEqual( + result, + "Avenida Dr. Arnaldo, 715, 01246-904 São Paulo SP Brazil, Tel./Fax: +55 11 3061-7985", + ) + + def test_clean_br_tags_no_tags(self): + result = JournalPayload._clean_br_tags( + "Rua Leopoldo Bulhões, 1480, Rio de Janeiro" + ) + self.assertEqual(result, "Rua Leopoldo Bulhões, 1480, Rio de Janeiro") + + def test_clean_br_tags_none(self): + result = JournalPayload._clean_br_tags(None) + self.assertIsNone(result) + + def test_clean_br_tags_empty(self): + result = JournalPayload._clean_br_tags("") + self.assertEqual(result, "") + + def test_clean_br_tags_real_world_example(self): + """Test with the exact pattern from the issue screenshot.""" + result = JournalPayload._clean_br_tags( + "Centre for Gender and Africa Studies, University of the Free State, " + "PO Box 339,
    Bloemfontein, Free State, ZA, 9300, " + "
    Tel: +27 (0)82 384 7027 - E-mail: henning.melber@nai.uu.se" + ) + self.assertEqual( + result, + "Centre for Gender and Africa Studies, University of the Free State, " + "PO Box 339, Bloemfontein, Free State, ZA, 9300, " + "Tel: +27 (0)82 384 7027 - E-mail: henning.melber@nai.uu.se", + ) + + +class JournalPayloadAddContactTest(TestCase): + def test_add_contact_strips_br_from_address(self): + payload = {} + builder = JournalPayload(payload) + builder.add_contact( + name="Test Publisher", + email="test@example.com", + address="Street 1,
    City,
    Country", + city="City", + state="State", + country="Country", + ) + self.assertEqual( + payload["contact"]["address"], + "Street 1, City, Country", + ) + + def test_add_contact_without_br(self): + payload = {} + builder = JournalPayload(payload) + builder.add_contact( + name="Test Publisher", + email="test@example.com", + address="Street 1, City, Country", + city="City", + state="State", + country="Country", + ) + self.assertEqual( + payload["contact"]["address"], + "Street 1, City, Country", + ) + + def test_add_contact_none_address(self): + payload = {} + builder = JournalPayload(payload) + builder.add_contact( + name="Test Publisher", + email="test@example.com", + address=None, + city="City", + state="State", + country="Country", + ) + self.assertIsNone(payload["contact"]["address"]) diff --git a/publication/migrations/0004_articleavailability_publication_rule_and_more.py b/publication/migrations/0004_alter_articleavailability_options_and_more.py similarity index 52% rename from publication/migrations/0004_articleavailability_publication_rule_and_more.py rename to publication/migrations/0004_alter_articleavailability_options_and_more.py index 837472d78..a0d0fb730 100644 --- a/publication/migrations/0004_articleavailability_publication_rule_and_more.py +++ b/publication/migrations/0004_alter_articleavailability_options_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.0.3 on 2025-04-24 15:12 +# Generated by Django 5.0.3 on 2025-05-14 15:12 from django.db import migrations, models @@ -9,6 +9,13 @@ class Migration(migrations.Migration): ] operations = [ + migrations.AlterModelOptions( + name="articleavailability", + options={ + "verbose_name": "Article availability", + "verbose_name_plural": "Article availability", + }, + ), migrations.AddField( model_name="articleavailability", name="publication_rule", @@ -23,4 +30,15 @@ class Migration(migrations.Migration): blank=True, max_length=30, null=True, verbose_name="published by" ), ), + migrations.AddField( + model_name="articleavailability", + name="published_percentage", + field=models.DecimalField( + decimal_places=2, + default=0.0, + help_text="0-100", + max_digits=5, + verbose_name="Published percentual", + ), + ), ] diff --git a/publication/migrations/0005_alter_articleavailability_options_and_more.py b/publication/migrations/0005_alter_articleavailability_options_and_more.py index 275861f79..d4376dd6f 100644 --- a/publication/migrations/0005_alter_articleavailability_options_and_more.py +++ b/publication/migrations/0005_alter_articleavailability_options_and_more.py @@ -5,10 +5,18 @@ class Migration(migrations.Migration): dependencies = [ - ("publication", "0004_articleavailability_publication_rule_and_more"), + ("publication", "0004_alter_articleavailability_options_and_more"), ] operations = [ + migrations.AlterModelOptions( + name="articleavailability", + options={}, + ), + migrations.RemoveField( + model_name="articleavailability", + name="published_percentage", + ), migrations.AddField( model_name="articleavailability", name="completed", diff --git a/publication/utils/document.py b/publication/utils/document.py index 46c83563a..8331747ee 100644 --- a/publication/utils/document.py +++ b/publication/utils/document.py @@ -116,7 +116,11 @@ def get_contribs(self): try: affs = item.get("affs") or [] affiliation = ", ".join( - [a.get("original") or a.get("orgname") for a in affs] + [ + a.get("original") or a.get("orgname") + for a in affs + if a.get("original") or a.get("orgname") + ] ) except KeyError: affiliation = None diff --git a/publication/utils/test_document.py b/publication/utils/test_document.py new file mode 100644 index 000000000..3509c8d7b --- /dev/null +++ b/publication/utils/test_document.py @@ -0,0 +1,112 @@ +from unittest import TestCase +from unittest.mock import MagicMock + +from lxml import etree + +from publication.utils.document import XMLArticle + + +def _create_xml_article(xml_string): + xmltree = etree.fromstring(xml_string) + xml_with_pre = MagicMock() + xml_with_pre.xmltree = xmltree + return XMLArticle(xml_with_pre) + + +class XMLArticleGetContribsTest(TestCase): + def test_get_contribs_with_affiliation_missing_original_and_orgname(self): + """Regression test: affiliations lacking both 'original' and 'orgname' + should not raise TypeError in str.join().""" + xml_string = """
    + + + + + + Silva + Rafaela + + + + + + + + + +
    """ + + article_xml = _create_xml_article(xml_string) + result = article_xml.get_contribs() + + self.assertEqual(len(result["names"]), 1) + self.assertEqual(result["names"][0]["surname"], "Silva") + self.assertEqual(result["names"][0]["given_names"], "Rafaela") + self.assertEqual(result["names"][0]["affiliation"], "") + + def test_get_contribs_with_valid_affiliation(self): + xml_string = """
    + + + + + + Costa + Ana + + + + + + Universidade de São Paulo, SP, Brasil + + + +
    """ + + article_xml = _create_xml_article(xml_string) + result = article_xml.get_contribs() + + self.assertEqual(len(result["names"]), 1) + self.assertIn("Universidade de São Paulo", result["names"][0]["affiliation"]) + + def test_get_contribs_with_no_affs(self): + xml_string = """
    + + + + + + Pereira + João + + + + + +
    """ + + article_xml = _create_xml_article(xml_string) + result = article_xml.get_contribs() + + self.assertEqual(len(result["names"]), 1) + self.assertEqual(result["names"][0]["surname"], "Pereira") + self.assertEqual(result["names"][0]["affiliation"], "") + + def test_get_contribs_with_no_contribs(self): + xml_string = """
    + + + + +
    """ + + article_xml = _create_xml_article(xml_string) + result = article_xml.get_contribs() + + self.assertEqual(result["names"], []) + self.assertEqual(result["collabs"], []) diff --git a/requirements/base.txt b/requirements/base.txt index dffe11f4e..8ca775e17 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -90,10 +90,10 @@ iso639-lang==2.6.3 # Mantendo versão maior # SciELO Specific Packages # ======================================== # Using specific versions for stability --e git+https://github.com/scieloorg/packtools.git@4.14.4#egg=packtools +-e git+https://github.com/scieloorg/packtools.git@4.16.1#egg=packtools -e git+https://github.com/scieloorg/scielo_scholarly_data#egg=scielo_scholarly_data -e git+https://github.com/scieloorg/opac_schema.git@v2.66#egg=opac_schema --e git+https://github.com/scieloorg/scielo_migration.git@1.10.6#egg=scielo_classic_website +-e git+https://github.com/scieloorg/scielo_migration.git@1.10.7#egg=scielo_classic_website # ======================================== # Development & Testing diff --git a/researcher/wagtail_hooks.py b/researcher/wagtail_hooks.py index 2bddeb4a4..cfb796a1a 100644 --- a/researcher/wagtail_hooks.py +++ b/researcher/wagtail_hooks.py @@ -1,27 +1,18 @@ -from django.http import HttpResponseRedirect from django.utils.translation import gettext_lazy as _ -from wagtail_modeladmin.options import ModelAdmin, modeladmin_register -from wagtail_modeladmin.views import CreateView +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet from config.menu import get_menu_order from .models import Researcher -class ResearcherCreateView(CreateView): - def form_valid(self, form): - self.object = form.save_all(self.request.user) - return HttpResponseRedirect(self.get_success_url()) - - -class ResearcherAdmin(ModelAdmin): +class ResearcherViewSet(SnippetViewSet): model = Researcher - create_view_class = ResearcherCreateView menu_label = _("Researcher") menu_icon = "folder" menu_order = get_menu_order("researcher") add_to_settings_menu = False - exclude_from_explorer = False -# modeladmin_register(ResearcherAdmin) +register_snippet(ResearcherViewSet) diff --git a/team/admin.py b/team/admin.py index 8c38f3f3d..326c01178 100644 --- a/team/admin.py +++ b/team/admin.py @@ -1,3 +1,2 @@ -from django.contrib import admin +# Team models are registered as Wagtail snippets in wagtail_hooks.py -# Register your models here. diff --git a/team/forms.py b/team/forms.py deleted file mode 100644 index 42826dfe8..000000000 --- a/team/forms.py +++ /dev/null @@ -1,15 +0,0 @@ -from wagtail.admin.forms import WagtailAdminModelForm - - -class CollectionTeamMemberModelForm(WagtailAdminModelForm): - def save_all(self, user): - member = super().save(commit=False) - - if self.instance.pk is not None: - member.updated_by = user - else: - member.creator = user - - self.save() - - return member diff --git a/team/migrations/0002_company_companyteammember_journalcompanycontract_and_more.py b/team/migrations/0002_company_companyteammember_journalcompanycontract_and_more.py new file mode 100644 index 000000000..3e461b38e --- /dev/null +++ b/team/migrations/0002_company_companyteammember_journalcompanycontract_and_more.py @@ -0,0 +1,406 @@ +# Generated by Django 5.2.3 on 2026-02-18 23:03 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("journal", "0014_alter_journal_title_alter_officialjournal_title"), + ("team", "0001_initial"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="Company", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "name", + models.CharField( + max_length=255, unique=True, verbose_name="Company Name" + ), + ), + ( + "description", + models.TextField(blank=True, null=True, verbose_name="Description"), + ), + ( + "contact_email", + models.EmailField( + blank=True, + max_length=254, + null=True, + verbose_name="Contact Email", + ), + ), + ( + "contact_phone", + models.CharField( + blank=True, + max_length=50, + null=True, + verbose_name="Contact Phone", + ), + ), + ("is_active", models.BooleanField(default=True, verbose_name="Active")), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "verbose_name": "Company", + "verbose_name_plural": "Companies", + }, + ), + migrations.CreateModel( + name="CompanyTeamMember", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "is_active_member", + models.BooleanField(blank=True, default=True, null=True), + ), + ( + "role", + models.CharField( + choices=[("manager", "Manager"), ("member", "Member")], + default="member", + max_length=20, + verbose_name="Role", + ), + ), + ( + "company", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="team_members", + to="team.company", + verbose_name="Company", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ( + "user", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "verbose_name": "Company Team Member", + "verbose_name_plural": "Company Team Members", + }, + ), + migrations.CreateModel( + name="JournalCompanyContract", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ("is_active", models.BooleanField(default=True, verbose_name="Active")), + ( + "start_date", + models.DateField(blank=True, null=True, verbose_name="Start Date"), + ), + ( + "end_date", + models.DateField(blank=True, null=True, verbose_name="End Date"), + ), + ( + "notes", + models.TextField(blank=True, null=True, verbose_name="Notes"), + ), + ( + "company", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="journal_contracts", + to="team.company", + verbose_name="Company", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "journal", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="company_contracts", + to="journal.journal", + verbose_name="Journal", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "verbose_name": "Journal-Company Contract", + "verbose_name_plural": "Journal-Company Contracts", + }, + ), + migrations.CreateModel( + name="JournalTeamMember", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "is_active_member", + models.BooleanField(blank=True, default=True, null=True), + ), + ( + "role", + models.CharField( + choices=[("manager", "Manager"), ("member", "Member")], + default="member", + max_length=20, + verbose_name="Role", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "journal", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="team_members", + to="journal.journal", + verbose_name="Journal", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ( + "user", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "verbose_name": "Journal Team Member", + "verbose_name_plural": "Journal Team Members", + }, + ), + migrations.AddIndex( + model_name="company", + index=models.Index(fields=["name"], name="team_compan_name_fce1e5_idx"), + ), + migrations.AddIndex( + model_name="company", + index=models.Index( + fields=["is_active"], name="team_compan_is_acti_66037b_idx" + ), + ), + migrations.AddIndex( + model_name="companyteammember", + index=models.Index( + fields=["company", "role"], name="team_compan_company_9915d9_idx" + ), + ), + migrations.AddIndex( + model_name="companyteammember", + index=models.Index( + fields=["user", "is_active_member"], + name="team_compan_user_id_cecff1_idx", + ), + ), + migrations.AlterUniqueTogether( + name="companyteammember", + unique_together={("user", "company")}, + ), + migrations.AddIndex( + model_name="journalcompanycontract", + index=models.Index( + fields=["journal", "is_active"], name="team_journa_journal_1acfa7_idx" + ), + ), + migrations.AddIndex( + model_name="journalcompanycontract", + index=models.Index( + fields=["company", "is_active"], name="team_journa_company_d18904_idx" + ), + ), + migrations.AlterUniqueTogether( + name="journalcompanycontract", + unique_together={("journal", "company")}, + ), + migrations.AddIndex( + model_name="journalteammember", + index=models.Index( + fields=["journal", "role"], name="team_journa_journal_d9a41d_idx" + ), + ), + migrations.AddIndex( + model_name="journalteammember", + index=models.Index( + fields=["user", "is_active_member"], + name="team_journa_user_id_dfe846_idx", + ), + ), + migrations.AlterUniqueTogether( + name="journalteammember", + unique_together={("user", "journal")}, + ), + ] diff --git a/team/migrations/0003_add_role_to_collectionteammember.py b/team/migrations/0003_add_role_to_collectionteammember.py new file mode 100644 index 000000000..81a3c898b --- /dev/null +++ b/team/migrations/0003_add_role_to_collectionteammember.py @@ -0,0 +1,36 @@ +# Generated by Django 5.2.3 on 2026-02-19 01:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("team", "0002_company_companyteammember_journalcompanycontract_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="collectionteammember", + name="role", + field=models.CharField( + choices=[("manager", "Manager"), ("member", "Member")], + default="member", + max_length=20, + verbose_name="Role", + ), + ), + migrations.AddIndex( + model_name="collectionteammember", + index=models.Index( + fields=["collection", "role"], + name="team_collec_collect_idx", + ), + ), + migrations.AddIndex( + model_name="collectionteammember", + index=models.Index( + fields=["user", "is_active_member"], + name="team_collec_user_id_idx", + ), + ), + ] diff --git a/team/migrations/0004_rename_team_collec_collect_idx_team_collec_collect_0ee96e_idx_and_more.py b/team/migrations/0004_rename_team_collec_collect_idx_team_collec_collect_0ee96e_idx_and_more.py new file mode 100644 index 000000000..1f47e1ce9 --- /dev/null +++ b/team/migrations/0004_rename_team_collec_collect_idx_team_collec_collect_0ee96e_idx_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 5.2.3 on 2026-02-19 18:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("team", "0003_add_role_to_collectionteammember"), + ] + + operations = [ + migrations.RenameIndex( + model_name="collectionteammember", + new_name="team_collec_collect_0ee96e_idx", + old_name="team_collec_collect_idx", + ), + migrations.RenameIndex( + model_name="collectionteammember", + new_name="team_collec_user_id_6382ef_idx", + old_name="team_collec_user_id_idx", + ), + migrations.AddField( + model_name="company", + name="certified_since", + field=models.DateField( + blank=True, null=True, verbose_name="Certified Since" + ), + ), + migrations.AddField( + model_name="company", + name="logo", + field=models.ImageField( + blank=True, null=True, upload_to="logos/", verbose_name="Logo" + ), + ), + migrations.AddField( + model_name="company", + name="personal_contact", + field=models.CharField( + blank=True, max_length=30, null=True, verbose_name="Personal Contact" + ), + ), + migrations.AddField( + model_name="company", + name="url", + field=models.URLField(blank=True, null=True, verbose_name="URL"), + ), + ] diff --git a/team/models.py b/team/models.py index 562ce9e2a..52d0fe7a4 100644 --- a/team/models.py +++ b/team/models.py @@ -4,30 +4,82 @@ from django.db import models from django.db.models import Q from django.utils.translation import gettext_lazy as _ -from modelcluster.fields import ParentalKey -from modelcluster.models import ClusterableModel -from wagtail.admin.panels import FieldPanel, InlinePanel, ObjectList, TabbedInterface +from wagtail.admin.panels import FieldPanel from wagtailautocomplete.edit_handlers import AutocompletePanel from collection.models import Collection +from core.models import CommonControlField, VisualIdentityMixin from core.forms import CoreAdminModelForm -from core.models import CommonControlField -from team.forms import CollectionTeamMemberModelForm - User = get_user_model() ALLOWED_COLLECTIONS = ["dom", "spa", "scl", "pan"] +class TeamRole(models.TextChoices): + """Role types for team members.""" + MANAGER = "manager", _("Manager") + MEMBER = "member", _("Member") + + +def get_user_membership_ids(user): + """ + Returns a dict with the list IDs of collections, journals or companies + that the user is actively associated with, depending on team membership type. + Priority order: collection > journal > company. + + For collection team members, journal_list_ids is also populated with the journals + that belong to the user's collections. + For company team members, journal_list_ids is also populated with the journals + that have active contracts with the user's companies. + """ + from journal.models import JournalCollection + + result = {"collection_list_ids": [], "journal_list_ids": [], "company_list_ids": []} + + collection_ids = list( + CollectionTeamMember.objects.filter(user=user, is_active_member=True) + .values_list("collection", flat=True) + ) + if collection_ids: + result["collection_list_ids"] = collection_ids + result["journal_list_ids"] = list( + JournalCollection.objects.filter( + collection__in=collection_ids + ).values_list("journal", flat=True) + ) + return result + + journal_ids = list( + JournalTeamMember.objects.filter(user=user, is_active_member=True) + .values_list("journal", flat=True) + ) + if journal_ids: + result["journal_list_ids"] = journal_ids + return result + + company_ids = list( + CompanyTeamMember.objects.filter(user=user, is_active_member=True) + .values_list("company", flat=True) + ) + if company_ids: + result["company_list_ids"] = company_ids + result["journal_list_ids"] = list( + JournalCompanyContract.objects.filter( + company__in=company_ids, is_active=True + ).values_list("journal", flat=True) + ) + return result + + def has_permission(user=None): try: if not user: - logging.info(f"has_permission: collection") + logging.info("has_permission: collection") return Collection.objects.filter(acron__in=ALLOWED_COLLECTIONS).exists() - logging.info(f"has_permission: user") + logging.info("has_permission: user") return CollectionTeamMember.has_upload_permission(user) - except Exception as e: + except Exception: return False @@ -35,6 +87,7 @@ class TeamMember(CommonControlField): user = models.ForeignKey(User, null=True, blank=True, on_delete=models.SET_NULL) is_active_member = models.BooleanField(null=True, blank=True, default=True) + base_form_class = CoreAdminModelForm panels = [ FieldPanel("user"), FieldPanel("is_active_member"), @@ -68,11 +121,17 @@ class CollectionTeamMember(TeamMember): collection = models.ForeignKey( Collection, null=True, blank=True, on_delete=models.SET_NULL ) + role = models.CharField( + _("Role"), + max_length=20, + choices=TeamRole.choices, + default=TeamRole.MEMBER + ) - base_form_class = CollectionTeamMemberModelForm panels = [ AutocompletePanel("collection"), AutocompletePanel("user"), + FieldPanel("role"), FieldPanel("is_active_member"), ] @@ -80,6 +139,10 @@ class Meta: verbose_name = _("Team member") verbose_name_plural = _("Team members") unique_together = ("user", "collection") + indexes = [ + models.Index(fields=["collection", "role"]), + models.Index(fields=["user", "is_active_member"]), + ] @staticmethod def autocomplete_custom_queryset_filter(text): @@ -90,10 +153,34 @@ def autocomplete_custom_queryset_filter(text): ) def autocomplete_label(self): - return str(self.user) + return f"{self.user} - {self.collection} ({self.get_role_display()})" def __str__(self): - return f"{self.user} ({self.collection})" + return f"{self.user} - {self.collection} ({self.get_role_display()})" + + def is_manager(self): + """Check if this member is a manager.""" + return self.role == TeamRole.MANAGER + + @classmethod + def user_is_manager(cls, user, collection): + """Check if a user is a manager for a specific collection.""" + return cls.objects.filter( + user=user, + collection=collection, + role=TeamRole.MANAGER, + is_active_member=True + ).exists() + + @classmethod + def get_user_collections(cls, user, role=None, is_active=True): + """Get all collections a user is associated with.""" + filters = {"user": user} + if role: + filters["role"] = role + if is_active is not None: + filters["is_active_member"] = is_active + return cls.objects.filter(**filters).select_related("collection") @staticmethod def collections(user, is_active_member=None): @@ -106,7 +193,7 @@ def collections(user, is_active_member=None): else: for member in CollectionTeamMember.objects.filter(user=user): yield member.collection - except Exception as e: + except Exception: return Collection.objects.all() @staticmethod @@ -117,3 +204,276 @@ def members(user, is_active_member=None): @classmethod def has_upload_permission(cls, user): return cls.objects.filter(user=user, collection__acron__in=ALLOWED_COLLECTIONS).exists() + + +class Company(VisualIdentityMixin, CommonControlField): + """ + Company represents an editorial services provider that can be contracted + by journals to produce XML files. + """ + name = models.CharField(_("Company Name"), max_length=255, unique=True) + description = models.TextField(_("Description"), blank=True, null=True) + personal_contact = models.CharField(_("Personal Contact"), max_length=30, blank=True, null=True) + contact_email = models.EmailField(_("Contact Email"), blank=True, null=True) + contact_phone = models.CharField(_("Contact Phone"), max_length=50, blank=True, null=True) + certified_since = models.DateField(_("Certified Since"), blank=True, null=True) + is_active = models.BooleanField(_("Active"), default=True) + + class Meta: + verbose_name = _("Company") + verbose_name_plural = _("Companies") + indexes = [ + models.Index(fields=["name"]), + models.Index(fields=["is_active"]), + ] + base_form_class = CoreAdminModelForm + panels = [ + FieldPanel("name"), + FieldPanel("description"), + FieldPanel("url"), + FieldPanel("logo"), + FieldPanel("personal_contact"), + FieldPanel("contact_email"), + FieldPanel("contact_phone"), + FieldPanel("certified_since"), + FieldPanel("is_active"), + ] + + def __str__(self): + return self.name + + autocomplete_search_field = "name" + + def autocomplete_label(self): + return self.name + + @classmethod + def get_managers(cls, company_id): + """Get all managers for this company.""" + return CompanyTeamMember.objects.filter( + company_id=company_id, + role=TeamRole.MANAGER, + is_active_member=True + ) + + @classmethod + def get_members(cls, company_id): + """Get all active members (including managers) for this company.""" + return CompanyTeamMember.objects.filter( + company_id=company_id, + is_active_member=True + ) + + +class JournalTeamMember(TeamMember): + """ + Editorial team members associated with a specific journal. + Can be either managers or regular members. + """ + journal = models.ForeignKey( + "journal.Journal", + on_delete=models.CASCADE, + related_name="team_members", + verbose_name=_("Journal") + ) + role = models.CharField( + _("Role"), + max_length=20, + choices=TeamRole.choices, + default=TeamRole.MEMBER + ) + + class Meta: + verbose_name = _("Journal Team Member") + verbose_name_plural = _("Journal Team Members") + unique_together = ("user", "journal") + indexes = [ + models.Index(fields=["journal", "role"]), + models.Index(fields=["user", "is_active_member"]), + ] + + panels = [ + AutocompletePanel("journal"), + AutocompletePanel("user"), + FieldPanel("role"), + FieldPanel("is_active_member"), + ] + + def __str__(self): + return f"{self.user} - {self.journal} ({self.get_role_display()})" + + @staticmethod + def autocomplete_custom_queryset_filter(text): + return JournalTeamMember.objects.filter( + Q(user__username__icontains=text) + | Q(user__email__icontains=text) + | Q(user__name__icontains=text) + | Q(journal__title__icontains=text) + ) + + def autocomplete_label(self): + return f"{self.user} - {self.journal} ({self.get_role_display()})" + + def is_manager(self): + """Check if this member is a manager.""" + return self.role == TeamRole.MANAGER + + @classmethod + def user_is_manager(cls, user, journal): + """Check if a user is a manager for a specific journal.""" + return cls.objects.filter( + user=user, + journal=journal, + role=TeamRole.MANAGER, + is_active_member=True + ).exists() + + @classmethod + def get_user_journals(cls, user, role=None, is_active=True): + """Get all journals a user is associated with.""" + filters = {"user": user} + if role: + filters["role"] = role + if is_active is not None: + filters["is_active_member"] = is_active + return cls.objects.filter(**filters).select_related("journal") + + +class CompanyTeamMember(TeamMember): + """ + Company team members (XML providers) associated with an editorial services company. + Can be either managers or regular members. + """ + company = models.ForeignKey( + Company, + on_delete=models.CASCADE, + related_name="team_members", + verbose_name=_("Company") + ) + role = models.CharField( + _("Role"), + max_length=20, + choices=TeamRole.choices, + default=TeamRole.MEMBER + ) + + class Meta: + verbose_name = _("Company Team Member") + verbose_name_plural = _("Company Team Members") + unique_together = ("user", "company") + indexes = [ + models.Index(fields=["company", "role"]), + models.Index(fields=["user", "is_active_member"]), + ] + + panels = [ + AutocompletePanel("company"), + AutocompletePanel("user"), + FieldPanel("role"), + FieldPanel("is_active_member"), + ] + + def __str__(self): + return f"{self.user} - {self.company} ({self.get_role_display()})" + + @staticmethod + def autocomplete_custom_queryset_filter(text): + return CompanyTeamMember.objects.filter( + Q(user__username__icontains=text) + | Q(user__email__icontains=text) + | Q(user__name__icontains=text) + | Q(company__name__icontains=text) + ) + + def autocomplete_label(self): + return f"{self.user} - {self.company} ({self.get_role_display()})" + + def is_manager(self): + """Check if this member is a manager.""" + return self.role == TeamRole.MANAGER + + @classmethod + def user_is_manager(cls, user, company): + """Check if a user is a manager for a specific company.""" + return cls.objects.filter( + user=user, + company=company, + role=TeamRole.MANAGER, + is_active_member=True + ).exists() + + @classmethod + def get_user_companies(cls, user, role=None, is_active=True): + """Get all companies a user is associated with.""" + filters = {"user": user} + if role: + filters["role"] = role + if is_active is not None: + filters["is_active_member"] = is_active + return cls.objects.filter(**filters).select_related("company") + + +class JournalCompanyContract(CommonControlField): + """ + Represents a contract between a journal and a company for XML production services. + Journal managers can manage these contracts. + """ + journal = models.ForeignKey( + "journal.Journal", + on_delete=models.CASCADE, + related_name="company_contracts", + verbose_name=_("Journal") + ) + company = models.ForeignKey( + Company, + on_delete=models.CASCADE, + related_name="journal_contracts", + verbose_name=_("Company") + ) + is_active = models.BooleanField(_("Active"), default=True) + start_date = models.DateField(_("Start Date"), null=True, blank=True) + end_date = models.DateField(_("End Date"), null=True, blank=True) + notes = models.TextField(_("Notes"), blank=True, null=True) + + class Meta: + verbose_name = _("Journal-Company Contract") + verbose_name_plural = _("Journal-Company Contracts") + unique_together = ("journal", "company") + indexes = [ + models.Index(fields=["journal", "is_active"]), + models.Index(fields=["company", "is_active"]), + ] + base_form_class = CoreAdminModelForm + panels = [ + AutocompletePanel("journal"), + AutocompletePanel("company"), + FieldPanel("is_active"), + FieldPanel("start_date"), + FieldPanel("end_date"), + FieldPanel("notes"), + ] + + def __str__(self): + status = "Active" if self.is_active else "Inactive" + return f"{self.journal} - {self.company} ({status})" + + @classmethod + def get_journal_companies(cls, journal, is_active=True): + """Get all companies contracted by a journal.""" + filters = {"journal": journal} + if is_active is not None: + filters["is_active"] = is_active + return cls.objects.filter(**filters).select_related("company") + + @classmethod + def get_company_journals(cls, company, is_active=True): + """Get all journals that contracted a company.""" + filters = {"company": company} + if is_active is not None: + filters["is_active"] = is_active + return cls.objects.filter(**filters).select_related("journal") + + @classmethod + def can_manage_contract(cls, user, journal): + """Check if a user can manage contracts for a journal (must be a journal manager).""" + return JournalTeamMember.user_is_manager(user, journal) diff --git a/team/tests.py b/team/tests.py index 7ce503c2d..0d46b39ef 100644 --- a/team/tests.py +++ b/team/tests.py @@ -1,3 +1,542 @@ +from django.contrib.auth import get_user_model +from django.db import IntegrityError from django.test import TestCase -# Create your tests here. +from collection.models import Collection +from journal.models import Journal +from team.models import ( + CollectionTeamMember, + Company, + CompanyTeamMember, + JournalCompanyContract, + JournalTeamMember, + TeamRole, +) + +User = get_user_model() + + +class CollectionTeamMemberModelTest(TestCase): + """Test cases for the CollectionTeamMember model.""" + + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@example.com", password="testpass123" + ) + self.manager_user = User.objects.create_user( + username="manager", email="manager@example.com", password="testpass123" + ) + self.collection = Collection.objects.create( + acron="TST", + name="Test Collection", + creator=self.user, + ) + + def test_create_collection_team_member(self): + """Test creating a collection team member.""" + member = CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(member.user, self.user) + self.assertEqual(member.collection, self.collection) + self.assertEqual(member.role, TeamRole.MEMBER) + self.assertFalse(member.is_manager()) + + def test_create_collection_team_manager(self): + """Test creating a collection team manager.""" + manager = CollectionTeamMember.objects.create( + user=self.manager_user, + collection=self.collection, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(manager.role, TeamRole.MANAGER) + self.assertTrue(manager.is_manager()) + + def test_collection_team_member_unique_together(self): + """Test that a user can only be added once to a collection.""" + CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + creator=self.user, + ) + with self.assertRaises(IntegrityError): + CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MANAGER, + creator=self.user, + ) + + def test_user_is_manager(self): + """Test checking if a user is a collection manager.""" + CollectionTeamMember.objects.create( + user=self.manager_user, + collection=self.collection, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertTrue( + CollectionTeamMember.user_is_manager(self.manager_user, self.collection) + ) + self.assertFalse(CollectionTeamMember.user_is_manager(self.user, self.collection)) + + def test_get_user_collections(self): + """Test getting collections for a user.""" + CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + collections = CollectionTeamMember.get_user_collections(self.user) + self.assertEqual(collections.count(), 1) + self.assertEqual(collections.first().collection, self.collection) + + def test_collection_get_managers(self): + """Test getting managers for a collection.""" + CollectionTeamMember.objects.create( + user=self.manager_user, + collection=self.collection, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + managers = Collection.get_managers(self.collection.id) + self.assertEqual(managers.count(), 1) + self.assertEqual(managers.first().user, self.manager_user) + + def test_collection_get_members(self): + """Test getting all members (including managers) for a collection.""" + CollectionTeamMember.objects.create( + user=self.manager_user, + collection=self.collection, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + members = Collection.get_members(self.collection.id) + self.assertEqual(members.count(), 2) + + def test_default_role_is_member(self): + """Test that the default role is MEMBER.""" + member = CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(member.role, TeamRole.MEMBER) + + def test_autocomplete_label_includes_role(self): + """Test that autocomplete label includes role.""" + member = CollectionTeamMember.objects.create( + user=self.user, + collection=self.collection, + role=TeamRole.MEMBER, + creator=self.user, + ) + label = member.autocomplete_label() + self.assertIn("Member", label) + self.assertIn(str(self.user), label) + self.assertIn(str(self.collection), label) + + def test_str_includes_role(self): + """Test that string representation includes role.""" + manager = CollectionTeamMember.objects.create( + user=self.manager_user, + collection=self.collection, + role=TeamRole.MANAGER, + creator=self.user, + ) + str_repr = str(manager) + self.assertIn("Manager", str_repr) + self.assertIn(str(self.manager_user), str_repr) + + +class CompanyModelTest(TestCase): + """Test cases for the Company model.""" + + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@example.com", password="testpass123" + ) + + def test_create_company(self): + """Test creating a company.""" + company = Company.objects.create( + name="Test Company", + description="A test company", + contact_email="contact@testcompany.com", + contact_phone="+55 11 1234-5678", + is_active=True, + creator=self.user, + ) + self.assertEqual(company.name, "Test Company") + self.assertTrue(company.is_active) + self.assertEqual(str(company), "Test Company") + + def test_company_unique_name(self): + """Test that company names must be unique.""" + Company.objects.create( + name="Unique Company", + creator=self.user, + ) + with self.assertRaises(IntegrityError): + Company.objects.create( + name="Unique Company", + creator=self.user, + ) + + def test_company_autocomplete_label(self): + """Test company autocomplete label.""" + company = Company.objects.create( + name="Test Company", + creator=self.user, + ) + self.assertEqual(company.autocomplete_label(), "Test Company") + + def test_company_with_visual_identity(self): + """Test creating a company with url, logo, certified_since, and personal_contact.""" + from datetime import date + company = Company.objects.create( + name="Certified Company", + url="https://example.com", + personal_contact="John Doe", + certified_since=date(2020, 1, 1), + creator=self.user, + ) + self.assertEqual(company.url, "https://example.com") + self.assertEqual(company.personal_contact, "John Doe") + self.assertEqual(company.certified_since, date(2020, 1, 1)) + + +class JournalTeamMemberModelTest(TestCase): + """Test cases for the JournalTeamMember model.""" + + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@example.com", password="testpass123" + ) + self.manager_user = User.objects.create_user( + username="manager", email="manager@example.com", password="testpass123" + ) + # Create a minimal journal for testing + self.journal = Journal.objects.create( + title="Test Journal", + creator=self.user, + ) + + def test_create_journal_team_member(self): + """Test creating a journal team member.""" + member = JournalTeamMember.objects.create( + user=self.user, + journal=self.journal, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(member.user, self.user) + self.assertEqual(member.journal, self.journal) + self.assertEqual(member.role, TeamRole.MEMBER) + self.assertFalse(member.is_manager()) + + def test_create_journal_team_manager(self): + """Test creating a journal team manager.""" + manager = JournalTeamMember.objects.create( + user=self.manager_user, + journal=self.journal, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(manager.role, TeamRole.MANAGER) + self.assertTrue(manager.is_manager()) + + def test_journal_team_member_unique_together(self): + """Test that a user can only be added once to a journal.""" + JournalTeamMember.objects.create( + user=self.user, + journal=self.journal, + role=TeamRole.MEMBER, + creator=self.user, + ) + with self.assertRaises(IntegrityError): + JournalTeamMember.objects.create( + user=self.user, + journal=self.journal, + role=TeamRole.MANAGER, + creator=self.user, + ) + + def test_user_is_manager(self): + """Test checking if a user is a journal manager.""" + JournalTeamMember.objects.create( + user=self.manager_user, + journal=self.journal, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertTrue( + JournalTeamMember.user_is_manager(self.manager_user, self.journal) + ) + self.assertFalse(JournalTeamMember.user_is_manager(self.user, self.journal)) + + def test_get_user_journals(self): + """Test getting journals for a user.""" + JournalTeamMember.objects.create( + user=self.user, + journal=self.journal, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + journals = JournalTeamMember.get_user_journals(self.user) + self.assertEqual(journals.count(), 1) + self.assertEqual(journals.first().journal, self.journal) + + +class CompanyTeamMemberModelTest(TestCase): + """Test cases for the CompanyTeamMember model.""" + + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@example.com", password="testpass123" + ) + self.manager_user = User.objects.create_user( + username="manager", email="manager@example.com", password="testpass123" + ) + self.company = Company.objects.create( + name="Test Company", + creator=self.user, + ) + + def test_create_company_team_member(self): + """Test creating a company team member.""" + member = CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(member.user, self.user) + self.assertEqual(member.company, self.company) + self.assertEqual(member.role, TeamRole.MEMBER) + self.assertFalse(member.is_manager()) + + def test_create_company_team_manager(self): + """Test creating a company team manager.""" + manager = CompanyTeamMember.objects.create( + user=self.manager_user, + company=self.company, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertEqual(manager.role, TeamRole.MANAGER) + self.assertTrue(manager.is_manager()) + + def test_company_team_member_unique_together(self): + """Test that a user can only be added once to a company.""" + CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MEMBER, + creator=self.user, + ) + with self.assertRaises(IntegrityError): + CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MANAGER, + creator=self.user, + ) + + def test_user_is_manager(self): + """Test checking if a user is a company manager.""" + CompanyTeamMember.objects.create( + user=self.manager_user, + company=self.company, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertTrue( + CompanyTeamMember.user_is_manager(self.manager_user, self.company) + ) + self.assertFalse(CompanyTeamMember.user_is_manager(self.user, self.company)) + + def test_get_user_companies(self): + """Test getting companies for a user.""" + CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + companies = CompanyTeamMember.get_user_companies(self.user) + self.assertEqual(companies.count(), 1) + self.assertEqual(companies.first().company, self.company) + + def test_company_get_managers(self): + """Test getting managers for a company.""" + CompanyTeamMember.objects.create( + user=self.manager_user, + company=self.company, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + managers = Company.get_managers(self.company.id) + self.assertEqual(managers.count(), 1) + self.assertEqual(managers.first().user, self.manager_user) + + def test_company_get_members(self): + """Test getting all members (including managers) for a company.""" + CompanyTeamMember.objects.create( + user=self.manager_user, + company=self.company, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + CompanyTeamMember.objects.create( + user=self.user, + company=self.company, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + members = Company.get_members(self.company.id) + self.assertEqual(members.count(), 2) + + +class JournalCompanyContractModelTest(TestCase): + """Test cases for the JournalCompanyContract model.""" + + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@example.com", password="testpass123" + ) + self.manager_user = User.objects.create_user( + username="manager", email="manager@example.com", password="testpass123" + ) + self.journal = Journal.objects.create( + title="Test Journal", + creator=self.user, + ) + self.company = Company.objects.create( + name="Test Company", + creator=self.user, + ) + + def test_create_contract(self): + """Test creating a journal-company contract.""" + contract = JournalCompanyContract.objects.create( + journal=self.journal, + company=self.company, + is_active=True, + notes="Test contract", + creator=self.user, + ) + self.assertEqual(contract.journal, self.journal) + self.assertEqual(contract.company, self.company) + self.assertTrue(contract.is_active) + self.assertIn(str(self.journal), str(contract)) + self.assertIn(str(self.company), str(contract)) + + def test_contract_unique_together(self): + """Test that a journal-company pair must be unique.""" + JournalCompanyContract.objects.create( + journal=self.journal, + company=self.company, + creator=self.user, + ) + with self.assertRaises(IntegrityError): + JournalCompanyContract.objects.create( + journal=self.journal, + company=self.company, + creator=self.user, + ) + + def test_get_journal_companies(self): + """Test getting companies contracted by a journal.""" + JournalCompanyContract.objects.create( + journal=self.journal, + company=self.company, + is_active=True, + creator=self.user, + ) + contracts = JournalCompanyContract.get_journal_companies(self.journal) + self.assertEqual(contracts.count(), 1) + self.assertEqual(contracts.first().company, self.company) + + def test_get_company_journals(self): + """Test getting journals that contracted a company.""" + JournalCompanyContract.objects.create( + journal=self.journal, + company=self.company, + is_active=True, + creator=self.user, + ) + contracts = JournalCompanyContract.get_company_journals(self.company) + self.assertEqual(contracts.count(), 1) + self.assertEqual(contracts.first().journal, self.journal) + + def test_can_manage_contract_as_manager(self): + """Test that journal managers can manage contracts.""" + JournalTeamMember.objects.create( + user=self.manager_user, + journal=self.journal, + role=TeamRole.MANAGER, + is_active_member=True, + creator=self.user, + ) + self.assertTrue( + JournalCompanyContract.can_manage_contract(self.manager_user, self.journal) + ) + + def test_can_manage_contract_as_non_manager(self): + """Test that non-managers cannot manage contracts.""" + JournalTeamMember.objects.create( + user=self.user, + journal=self.journal, + role=TeamRole.MEMBER, + is_active_member=True, + creator=self.user, + ) + self.assertFalse( + JournalCompanyContract.can_manage_contract(self.user, self.journal) + ) diff --git a/team/views.py b/team/views.py deleted file mode 100644 index b3bb3a06d..000000000 --- a/team/views.py +++ /dev/null @@ -1,16 +0,0 @@ -import logging - -from django.contrib import messages -from django.http import HttpResponseRedirect -from django.utils.translation import gettext_lazy as _ -from wagtail_modeladmin.views import CreateView, InspectView - - -class CollectionTeamMemberCreateView(CreateView): - def form_valid(self, form): - form.save_all(self.request.user) - messages.success( - self.request, - _("Member has been successfully created"), - ) - return HttpResponseRedirect(self.get_success_url()) diff --git a/team/wagtail_hooks.py b/team/wagtail_hooks.py index 2cdf55c1a..cfea8688b 100644 --- a/team/wagtail_hooks.py +++ b/team/wagtail_hooks.py @@ -1,63 +1,165 @@ -import json - -from django.contrib import messages -from django.http import HttpResponseRedirect -from django.shortcuts import get_object_or_404, redirect, render -from django.urls import include, path from django.utils.translation import gettext_lazy as _ -from wagtail import hooks -from wagtail_modeladmin.options import ( - ModelAdmin, - ModelAdminGroup, - modeladmin_register, -) +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup from config.menu import get_menu_order -from team.views import CollectionTeamMemberCreateView - -from .models import CollectionTeamMember +from core.views import CommonControlFieldCreateView +from core.forms import CoreAdminModelForm +from .models import ( + CollectionTeamMember, + Company, + CompanyTeamMember, + JournalCompanyContract, + JournalTeamMember, +) -class CollectionTeamMemberModelAdmin(ModelAdmin): +class CollectionTeamMemberViewSet(SnippetViewSet): model = CollectionTeamMember menu_label = _("Collection Team Members") - menu_icon = "folder" - menu_order = 200 + menu_icon = "group" add_to_settings_menu = False exclude_from_explorer = False - create_view_class = CollectionTeamMemberCreateView + base_form_class = CoreAdminModelForm + add_view_class = CommonControlFieldCreateView list_display = ( "user", - "is_active_member", "collection", + "role", + "is_active_member", "updated", ) - list_filter = ("is_active_member", "collection") + list_filter = ("role", "is_active_member", "collection") search_fields = ( "collection__name", "collection__acron", "user__name", + "user__username", + "user__email", ) def get_queryset(self, request): + qs = super().get_queryset(request) if request.user.is_superuser: - return super().get_queryset(request) + return qs return CollectionTeamMember.members(request.user) -class TeamModelAdminGroup(ModelAdminGroup): - menu_icon = "folder" +class CompanyViewSet(SnippetViewSet): + model = Company + menu_label = _("Companies") + menu_icon = "group" + add_to_settings_menu = False + exclude_from_explorer = False + add_view_class = CommonControlFieldCreateView + base_form_class = CoreAdminModelForm + + list_display = ( + "name", + "personal_contact", + "contact_email", + "certified_since", + "is_active", + "updated", + ) + list_filter = ("is_active", "certified_since") + search_fields = ( + "name", + "personal_contact", + "contact_email", + "url", + ) + + +class JournalTeamMemberViewSet(SnippetViewSet): + model = JournalTeamMember + menu_label = _("Journal Team Members") + menu_icon = "user" + add_to_settings_menu = False + exclude_from_explorer = False + add_view_class = CommonControlFieldCreateView + base_form_class = CoreAdminModelForm + + list_display = ( + "user", + "journal", + "role", + "is_active_member", + "created", + ) + list_filter = ("role", "is_active_member", "created") + search_fields = ( + "user__username", + "user__email", + "user__name", + "journal__title", + ) + + +class CompanyTeamMemberViewSet(SnippetViewSet): + model = CompanyTeamMember + menu_label = _("Company Team Members") + menu_icon = "user" + add_to_settings_menu = False + exclude_from_explorer = False + add_view_class = CommonControlFieldCreateView + base_form_class = CoreAdminModelForm + + list_display = ( + "user", + "company", + "role", + "is_active_member", + "created", + ) + list_filter = ("role", "is_active_member", "created") + search_fields = ( + "user__username", + "user__email", + "user__name", + "company__name", + ) + + +class JournalCompanyContractViewSet(SnippetViewSet): + model = JournalCompanyContract + menu_label = _("Journal-Company Contracts") + menu_icon = "doc-full" + add_to_settings_menu = False + exclude_from_explorer = False + add_view_class = CommonControlFieldCreateView + base_form_class = CoreAdminModelForm + + list_display = ( + "journal", + "company", + "is_active", + "start_date", + "end_date", + ) + list_filter = ("is_active", "start_date", "end_date") + search_fields = ( + "journal__title", + "company__name", + ) + + +class TeamViewSetGroup(SnippetViewSetGroup): + """ + Group of ViewSets for Team Management + """ + items = [ + CollectionTeamMemberViewSet, + CompanyViewSet, + JournalTeamMemberViewSet, + CompanyTeamMemberViewSet, + JournalCompanyContractViewSet, + ] + menu_icon = "group" menu_label = _("Teams") - items = (CollectionTeamMemberModelAdmin,) menu_order = get_menu_order("team") -modeladmin_register(TeamModelAdminGroup) - +register_snippet(TeamViewSetGroup) -# @hooks.register("register_admin_urls") -# def register_disclosure_url(): -# return [ -# path("team/", include("team.urls", namespace="team")), -# ] diff --git a/tracker/models.py b/tracker/models.py index 079eeefdb..cc6b80030 100644 --- a/tracker/models.py +++ b/tracker/models.py @@ -15,6 +15,7 @@ from core.forms import CoreAdminModelForm from core.models import CommonControlField +from core.utils.sanitize import sanitize_for_json from tracker import choices @@ -162,7 +163,7 @@ def create( json.dumps(detail) obj.detail = detail except Exception as e: - obj.detail = str(detail) + obj.detail = sanitize_for_json(detail) if exc_traceback: obj.traceback = traceback.format_tb(exc_traceback) @@ -249,7 +250,7 @@ def finish( try: json.dumps(detail) except Exception as exc_detail: - detail = str(detail) + detail = sanitize_for_json(detail) if detail: self.detail = detail diff --git a/upload/controller.py b/upload/controller.py index fb000dc9e..ce4d7acc2 100644 --- a/upload/controller.py +++ b/upload/controller.py @@ -1,13 +1,8 @@ import os import logging -import sys -import traceback from django.db.models import Q from django.utils.translation import gettext_lazy as _ -from packtools.sps.models.dates import ArticleDates -from packtools.sps.models.front_articlemeta_issue import ArticleMetaIssue -from packtools.sps.models.journal_meta import ISSN, Title from packtools.sps.pid_provider.xml_sps_lib import GetXMLItemsError, XMLWithPre from article import choices as article_choices @@ -16,9 +11,11 @@ from journal.models import Journal from package.models import update_zip_file from pid_provider.requester import PidRequester -from proc.controller import create_or_update_journal, create_or_update_issue +from proc.controller import ( + JournalDataChecker, + IssueDataChecker, +) -from tracker.models import UnexpectedEvent from upload.models import ( Package, ValidationReport, @@ -35,6 +32,136 @@ class UnexpectedPackageError(Exception): ... class PackageDataError(Exception): ... +class UploadJournalDataChecker(JournalDataChecker): + """Extensão de JournalDataChecker com funcionalidades específicas do fluxo de upload.""" + + def _build_similar_journals_msg(self): + """Monta mensagem com journals similares para diagnóstico.""" + similar_journals = [] + for j in Journal.get_similar_items( + self.journal_title, self.issn_electronic, self.issn_print + ): + similar_journals.append( + { + "journal_title": j.title, + "issn_electronic": j.official_journal.issn_electronic, + "issn_print": j.official_journal.issn_print, + } + ) + if similar_journals: + return _("Registered journals: {}. ").format(similar_journals) + return _("Found no registered journal. ") + + def raise_error(self): + """Monta e lança erro com informações detalhadas.""" + data = { + "journal_title": self.journal_title, + "issn_electronic": self.issn_electronic, + "issn_print": self.issn_print, + } + similar_journals = self._build_similar_journals_msg() + + if self.core_communication_error: + raise PackageDataError( + _( + "CORE COMMUNICATION FAILURE: Could not verify journal data. " + "The core API is unreachable. " + "Journal in XML: {}. {}" + ).format(data, similar_journals) + ) + raise PackageDataError( + _( + "Journal in XML must be a registered journal. " + "Journal in XML: {}. {}. " + "Register the journal on core.scielo.org" + ).format(data, similar_journals) + ) + + def check(self, response): + """Executa a verificação completa de journal e atualiza response.""" + journal = self.get_or_fetch() + if journal: + response["journal"] = journal + return + + response["journal"] = None + if self.core_communication_error: + response["core_communication_error"] = True + self.raise_error() + + +class UploadIssueDataChecker(IssueDataChecker): + """Extensão de IssueDataChecker com funcionalidades específicas do fluxo de upload.""" + + def _build_similar_issues_msg(self): + """Monta mensagem com issues similares para diagnóstico.""" + items = None + if self.publication_year and self.volume: + items = Issue.objects.filter( + Q(publication_year=self.publication_year) | Q(volume=self.volume), + journal=self._journal, + ) + elif self.publication_year: + items = Issue.objects.filter( + Q(publication_year=self.publication_year), journal=self._journal + ) + if items is None or not items.exists(): + items = Issue.objects.filter(journal=self._journal) + + issues = [] + for item in items.order_by("-publication_year"): + issues.append( + { + "publication_year": item.publication_year, + "volume": item.volume, + "number": item.number, + "supplement": item.supplement, + } + ) + if issues: + return _("Registered issues: {}. ").format(issues) + return _("{} has no registered issues").format(self._journal) + + def raise_error(self): + """Monta e lança erro com informações detalhadas.""" + data = { + "journal": self._journal, + "volume": self.volume, + "number": self.number, + "suppl": self.suppl, + "publication_year": self.publication_year, + } + similar_issues = self._build_similar_issues_msg() + + if self.core_communication_error: + raise PackageDataError( + _( + "CORE COMMUNICATION FAILURE: Could not verify issue data. " + "The core API is unreachable. " + "Issue in XML: {}. {}" + ).format(data, similar_issues) + ) + raise PackageDataError( + _( + "Issue in XML must be a registered issue. " + "Issue in XML {}. {}. " + "Register the issue on core.scielo.org" + ).format(data, similar_issues) + ) + + def check(self, response): + """Executa a verificação completa de issue e atualiza response.""" + issue = self.get_or_fetch() + if issue: + response["issue"] = issue + return + + response["issue"] = None + if self.core_communication_error: + response["core_communication_error"] = True + self.raise_error() + + def get_last_package(article_id, **kwargs): try: return ( @@ -165,15 +292,22 @@ def _check_article_and_journal(package, xml_with_pre, user): # verifica se journal e issue estão registrados xmltree = xml_with_pre.xmltree - _check_journal(response, xmltree, user) - logging.info(f"_check_journal: {response}") - _check_issue(response, xmltree, user) - logging.info(f"_check_issue: {response}") + journal_checker = UploadJournalDataChecker.from_xmltree(xmltree, user) + journal_checker.check(response) + logging.info(f"UploadJournalDataChecker.check: {response}") + + issue_checker = UploadIssueDataChecker.from_xmltree( + xmltree, user, response["journal"] + ) + issue_checker.check(response) + logging.info(f"UploadIssueDataChecker.check: {response}") # verifica a consistência dos dados de journal e issue # no XML e na base de dados - _check_xml_and_registered_data_compability(response) - logging.info(f"_check_xml_and_registered_data_compability: {response}") + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + logging.info(f"_check_xml_and_registered_data_compatibility: {response}") response["package_status"] = choices.PS_ENQUEUED_FOR_VALIDATION @@ -250,118 +384,44 @@ def _archive_pending_correction_package(response, name): ) -def _check_journal(response, xmltree, user): - xml = Title(xmltree) - journal_title = xml.journal_title - - xml = ISSN(xmltree) - issn_electronic = xml.epub - issn_print = xml.ppub - - response["journal"] = create_or_update_journal( - journal_title, issn_electronic, issn_print, user - ) - - if not response["journal"]: - data = { - "journal_title": journal_title, - "issn_electronic": issn_electronic, - "issn_print": issn_print, - } - similar_journals = [] - for j in Journal.get_similar_items(journal_title, issn_electronic, issn_print): - similar_journals.append( - { - "journal_title": j.title, - "issn_electronic": j.official_journal.issn_electronic, - "issn_print": j.official_journal.issn_print, - } - ) - if similar_journals: - similar_journals = _("Registered journals: {}. ").format(similar_journals) - else: - similar_journals = _("Found no registered journal. ") - raise PackageDataError( - _( - "Journal in XML must be a registered journal. Journal in XML: {}. {}. Register the journal on core.scielo.org" - ).format(data, similar_journals) - ) - - -def _check_issue(response, xmltree, user): - xml = ArticleDates(xmltree) - try: - publication_year = xml.collection_date["year"] - except (TypeError, KeyError, ValueError): - try: - publication_year = xml.article_date["year"] - except (TypeError, KeyError, ValueError): - publication_year = None - - xml = ArticleMetaIssue(xmltree) - response["issue"] = create_or_update_issue( - response["journal"], publication_year, xml.volume, xml.suppl, xml.number, user - ) - logging.info(f"issue: {response['issue']}") - if not response["issue"]: - data = { - "journal": response["journal"], - "volume": xml.volume, - "number": xml.number, - "suppl": xml.suppl, - "publication_year": publication_year, - } - logging.info(f"_check_issue {data}") - items = None - if publication_year and xml.volume: - items = Issue.objects.filter( - Q(publication_year=publication_year) | Q(volume=xml.volume), - journal=response["journal"], - ) - elif publication_year: - items = Issue.objects.filter( - Q(publication_year=publication_year), journal=response["journal"] - ) - if not items or not items.count(): - items = Issue.objects.filter(journal=response["journal"]) - - issues = [] - for item in items.order_by("-publication_year"): - issues.append( - { - "publication_year": publication_year, - "volume": item.volume, - "number": item.number, - "supplement": item.supplement, - } - ) - if issues: - issues = _("Registered issues: {}. ").format(issues) - else: - issues = _("{} has no registered issues").format(response["journal"]) - raise PackageDataError( - _( - "Issue in XML must be a registered issue. Issue in XML {}. {}. Register the issue on core.scielo.org" - ).format(data, issues) - ) - - -def _check_xml_and_registered_data_compability(response): +def _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker +): article = response["article"] if article: journal = response["journal"] if not journal == article.journal: - raise PackageDataError( - _("{} (registered, {}) differs from {} (XML, {})").format( + # divergência detectada - consulta dados remotos de journal + journal_checker.refresh(response) + journal = response["journal"] + + # re-verifica após a tentativa de atualização + if not journal == article.journal: + error_msg = _("{} (registered, {}) differs from {} (XML, {})").format( article.journal, article.journal.id, journal, journal.id ) - ) + if response.get("core_communication_error"): + error_msg = _( + "CORE COMMUNICATION FAILURE: {}. " + "Could not refresh data from core API" + ).format(error_msg) + raise PackageDataError(error_msg) issue = response["issue"] if not issue == article.issue: - raise PackageDataError( - _("{} (registered, {}) differs from {} (XML, {})").format( + # divergência detectada - consulta dados remotos de issue + issue_checker.refresh(response) + issue = response["issue"] + + # re-verifica após a tentativa de atualização + if not issue == article.issue: + error_msg = _("{} (registered, {}) differs from {} (XML, {})").format( article.issue, article.issue.id, issue, issue.id ) - ) + if response.get("core_communication_error"): + error_msg = _( + "CORE COMMUNICATION FAILURE: {}. " + "Could not refresh data from core API" + ).format(error_msg) + raise PackageDataError(error_msg) diff --git a/upload/migrations/0001_initial.py b/upload/migrations/0001_initial.py index f819f923b..25e53659f 100644 --- a/upload/migrations/0001_initial.py +++ b/upload/migrations/0001_initial.py @@ -14,7 +14,7 @@ class Migration(migrations.Migration): ("article", "0002_remove_articleauthor_author_and_more"), ("collection", "0003_websiteconfigurationendpoint"), ("issue", "0004_issue_issue_pid_suffix_issue_order_toc_tocsection"), - ("journal", "0005_journaltoc_alter_journal_official_journal_and_more"), + ("journal", "0005_officialjournal_next_journal_title_and_more"), ("package", "0003_remove_spspkg_components_remove_spspkg_scheduled_and_more"), ("proc", "0006_issueproc_resumption_date"), ("team", "0001_initial"), diff --git a/upload/migrations/0002_package_main_doi_and_more.py b/upload/migrations/0002_package_main_doi_and_more.py index 3434f2692..aafef5a85 100644 --- a/upload/migrations/0002_package_main_doi_and_more.py +++ b/upload/migrations/0002_package_main_doi_and_more.py @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ ("article", "0002_remove_articleauthor_author_and_more"), ("issue", "0004_issue_issue_pid_suffix_issue_order_toc_tocsection"), - ("journal", "0005_journaltoc_alter_journal_official_journal_and_more"), + ("journal", "0005_officialjournal_next_journal_title_and_more"), ("package", "0003_remove_spspkg_components_remove_spspkg_scheduled_and_more"), ("team", "0001_initial"), ("upload", "0001_initial"), diff --git a/upload/models.py b/upload/models.py index 8d241eae9..daf5c39c1 100644 --- a/upload/models.py +++ b/upload/models.py @@ -1717,8 +1717,13 @@ def get_numbers(cls, package, report=None): .values("status", "reaction") .annotate(total=Count("id")) ): - items["total_" + item["status"].lower()] += item["total"] - items["total_" + item["reaction"]] += item["total"] + status_key = "total_" + (item.get("status") or "").lower() + if status_key not in items: + continue + items[status_key] += item["total"] + reaction_key = "total_" + (item.get("reaction") or "") + if reaction_key in items: + items[reaction_key] += item["total"] total += item["total"] items["total"] = total diff --git a/upload/templates/modeladmin/upload/package/inspect.html b/upload/templates/modeladmin/upload/package/inspect.html index fca5a9f96..228134ada 100644 --- a/upload/templates/modeladmin/upload/package/inspect.html +++ b/upload/templates/modeladmin/upload/package/inspect.html @@ -51,6 +51,17 @@

    {% trans "Details" %}

    {% block content %} {{ block.super }} + {% if blocking_errors %} +
    + {% for error in blocking_errors %} +
    + +

    {{ error }}

    +
    + {% endfor %} +
    + {% endif %} + {% if category != 'generated-by-the-system' %}
    diff --git a/upload/test_controller.py b/upload/test_controller.py new file mode 100644 index 000000000..0ee99f845 --- /dev/null +++ b/upload/test_controller.py @@ -0,0 +1,562 @@ +import sys +import unittest +from unittest.mock import MagicMock, Mock, patch, call + +# Mock modules that cannot be imported in test environment +# (upload app is not in INSTALLED_APPS, and zip_pkg module doesn't exist) +_mock_upload_models = MagicMock() +_mock_upload_models.choices = MagicMock() +_mock_upload_models.choices.VAL_CAT_PACKAGE_FILE = "package-file" +_mock_upload_models.choices.PS_ENQUEUED_FOR_VALIDATION = "enqueued-for-validation" +_mock_upload_models.choices.PS_PENDING_CORRECTION = "pending-correction" +_mock_upload_models.choices.PS_UNEXPECTED = "unexpected" +_mock_upload_models.choices.VALIDATION_RESULT_BLOCKING = "BLOCKING" +_mock_upload_models.choices.PS_WIP = ("submitted",) +_mock_upload_models.choices.PC_NEW_DOCUMENT = "new-document" +sys.modules.setdefault("upload.utils.zip_pkg", MagicMock()) +sys.modules.setdefault("upload.models", _mock_upload_models) +sys.modules.setdefault("pid_provider.requester", MagicMock()) + +from upload.controller import ( + PackageDataError, + UploadJournalDataChecker, + UploadIssueDataChecker, + _check_xml_and_registered_data_compatibility, +) + + +class JournalDoesNotExist(Exception): + """Fake DoesNotExist exception for Journal model testing.""" + + pass + + +class IssueDoesNotExist(Exception): + """Fake DoesNotExist exception for Issue model testing.""" + + pass + + +class UploadJournalDataCheckerTestCase(unittest.TestCase): + """Test cases for UploadJournalDataChecker local-first lookup with core fallback.""" + + @patch("proc.source_core_api.Journal") + def test_check_returns_journal_from_local_data(self, mock_journal_cls): + """Test that local data is used first without querying core API.""" + mock_journal = Mock() + mock_journal_cls.get_registered.return_value = mock_journal + + response = {} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.check(response) + + self.assertEqual(response["journal"], mock_journal) + mock_journal_cls.get_registered.assert_called_once_with( + "Test Journal", "1234-5678", "8765-4321" + ) + + @patch("proc.source_core_api.fetch_and_create_journal") + @patch("proc.source_core_api.Journal") + def test_check_fetches_from_core_when_local_not_found( + self, mock_journal_cls, mock_fetch + ): + """Test that core API is queried when local data doesn't exist.""" + mock_journal = Mock() + mock_journal_cls.DoesNotExist = JournalDoesNotExist + # First call: DoesNotExist, second call after core fetch: returns journal + mock_journal_cls.get_registered.side_effect = [ + JournalDoesNotExist(), + mock_journal, + ] + + response = {} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.model = mock_journal_cls + checker.check(response) + + self.assertEqual(response["journal"], mock_journal) + mock_fetch.assert_called_once_with( + user, + issn_electronic="1234-5678", + issn_print="8765-4321", + force_update=True, + ) + + @patch("upload.controller.Journal") + @patch("proc.source_core_api.fetch_and_create_journal") + @patch("proc.source_core_api.Journal") + def test_check_raises_error_with_core_failure_message_when_core_unreachable( + self, mock_journal_cls, mock_fetch, mock_upload_journal_cls + ): + """Test that core communication failure is reported when core is unreachable.""" + from proc.source_core_api import FetchJournalDataException + + mock_journal_cls.DoesNotExist = JournalDoesNotExist + mock_journal_cls.get_registered.side_effect = JournalDoesNotExist() + mock_upload_journal_cls.get_similar_items.return_value = [] + mock_fetch.side_effect = FetchJournalDataException("Connection refused") + + response = {} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.model = mock_journal_cls + with self.assertRaises(PackageDataError) as context: + checker.check(response) + + self.assertIn("CORE COMMUNICATION FAILURE", str(context.exception)) + self.assertTrue(response.get("core_communication_error")) + + @patch("upload.controller.Journal") + @patch("proc.source_core_api.fetch_and_create_journal") + @patch("proc.source_core_api.Journal") + def test_check_raises_error_without_core_failure_when_journal_not_registered( + self, mock_journal_cls, mock_fetch, mock_upload_journal_cls + ): + """Test that a normal error is raised when journal is not registered (core works fine).""" + mock_journal_cls.DoesNotExist = JournalDoesNotExist + mock_journal_cls.get_registered.side_effect = JournalDoesNotExist() + mock_upload_journal_cls.get_similar_items.return_value = [] + + response = {} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.model = mock_journal_cls + with self.assertRaises(PackageDataError) as context: + checker.check(response) + + self.assertNotIn("CORE COMMUNICATION FAILURE", str(context.exception)) + self.assertIn("registered journal", str(context.exception)) + self.assertFalse(response.get("core_communication_error")) + + @patch("proc.source_core_api.Journal") + def test_check_does_not_call_core_when_local_found(self, mock_journal_cls): + """Test that core API is NOT called when local data exists.""" + mock_journal = Mock() + mock_journal_cls.get_registered.return_value = mock_journal + + response = {} + user = Mock() + + with patch("proc.source_core_api.fetch_and_create_journal") as mock_fetch: + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.check(response) + mock_fetch.assert_not_called() + + @patch("proc.source_core_api.Journal") + @patch("proc.source_core_api.fetch_and_create_journal") + def test_refresh_updates_response_on_success(self, mock_fetch, mock_journal_cls): + """Test that successful core fetch updates the journal in response.""" + mock_journal = Mock() + mock_journal_cls.get_registered.return_value = mock_journal + + response = {"journal": None} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.refresh(response) + + self.assertEqual(response["journal"], mock_journal) + self.assertFalse(response.get("core_communication_error")) + + @patch("proc.source_core_api.fetch_and_create_journal") + def test_refresh_sets_error_flag_on_core_failure(self, mock_fetch): + """Test that core API failure sets the core_communication_error flag.""" + from proc.source_core_api import FetchJournalDataException + + mock_fetch.side_effect = FetchJournalDataException("Timeout") + + response = {"journal": None} + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + checker.refresh(response) + + self.assertTrue(response.get("core_communication_error")) + + @patch("proc.source_core_api.Journal") + def test_get_or_fetch_returns_local_journal(self, mock_journal_cls): + """Test get_or_fetch returns journal from local data.""" + mock_journal = Mock() + mock_journal_cls.get_registered.return_value = mock_journal + + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + result = checker.get_or_fetch() + + self.assertEqual(result, mock_journal) + + @patch("proc.source_core_api.fetch_and_create_journal") + @patch("proc.source_core_api.Journal") + def test_core_communication_error_resets_on_successful_fetch( + self, mock_journal_cls, mock_fetch + ): + """Test that core_communication_error is reset when fetch succeeds.""" + from proc.source_core_api import FetchJournalDataException + + mock_journal_cls.DoesNotExist = JournalDoesNotExist + user = Mock() + + checker = UploadJournalDataChecker("Test Journal", "1234-5678", "8765-4321", user) + + # First fetch fails + mock_fetch.side_effect = FetchJournalDataException("Timeout") + checker.fetch_from_core() + self.assertTrue(checker.core_communication_error) + + # Second fetch succeeds + mock_fetch.side_effect = None + checker.fetch_from_core() + self.assertFalse(checker.core_communication_error) + + @patch("packtools.sps.models.journal_meta.ISSN") + @patch("packtools.sps.models.journal_meta.Title") + def test_from_xmltree_creates_checker(self, mock_title_cls, mock_issn_cls): + """Test that from_xmltree correctly creates a checker from XML data.""" + mock_title = Mock() + mock_title.journal_title = "Test Journal" + mock_title_cls.return_value = mock_title + + mock_issn = Mock() + mock_issn.epub = "1234-5678" + mock_issn.ppub = "8765-4321" + mock_issn_cls.return_value = mock_issn + + xmltree = Mock() + user = Mock() + + checker = UploadJournalDataChecker.from_xmltree(xmltree, user) + + self.assertEqual(checker.journal_title, "Test Journal") + self.assertEqual(checker.issn_electronic, "1234-5678") + self.assertEqual(checker.issn_print, "8765-4321") + self.assertIsInstance(checker, UploadJournalDataChecker) + + +class UploadIssueDataCheckerTestCase(unittest.TestCase): + """Test cases for UploadIssueDataChecker local-first lookup with core fallback.""" + + @patch("proc.source_core_api.Issue") + def test_check_returns_issue_from_local_data(self, mock_issue_cls): + """Test that local data is used first without querying core API.""" + mock_issue = Mock() + mock_issue_cls.get.return_value = mock_issue + + mock_journal = Mock() + response = {"journal": mock_journal} + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.check(response) + + self.assertEqual(response["issue"], mock_issue) + mock_issue_cls.get.assert_called_once_with( + journal=mock_journal, + volume="10", + supplement=None, + number="1", + ) + + @patch("proc.source_core_api.fetch_and_create_issues") + @patch("proc.source_core_api.Issue") + def test_check_fetches_from_core_when_local_not_found( + self, mock_issue_cls, mock_fetch + ): + """Test that core API is queried when local data doesn't exist.""" + mock_issue = Mock() + mock_issue_cls.DoesNotExist = IssueDoesNotExist + # First call: DoesNotExist, second call after core fetch: returns issue + mock_issue_cls.get.side_effect = [ + IssueDoesNotExist(), + mock_issue, + ] + + mock_journal = Mock() + response = {"journal": mock_journal} + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.model = mock_issue_cls + checker.check(response) + + self.assertEqual(response["issue"], mock_issue) + mock_fetch.assert_called_once_with( + mock_journal, "2024", "10", None, "1", user + ) + + @patch("upload.controller.Issue") + @patch("proc.source_core_api.fetch_and_create_issues") + @patch("proc.source_core_api.Issue") + def test_check_raises_error_with_core_failure_message_when_core_unreachable( + self, mock_issue_cls, mock_fetch, mock_upload_issue_cls + ): + """Test that core communication failure is reported when core is unreachable.""" + from proc.source_core_api import FetchIssueDataException + + mock_issue_cls.DoesNotExist = IssueDoesNotExist + mock_issue_cls.get.side_effect = IssueDoesNotExist() + mock_fetch.side_effect = FetchIssueDataException("Connection refused") + + mock_qs = MagicMock() + mock_qs.exists.return_value = False + mock_qs.order_by.return_value = [] + mock_upload_issue_cls.objects.filter.return_value = mock_qs + + mock_journal = Mock() + response = {"journal": mock_journal} + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.model = mock_issue_cls + with self.assertRaises(PackageDataError) as context: + checker.check(response) + + self.assertIn("CORE COMMUNICATION FAILURE", str(context.exception)) + self.assertTrue(response.get("core_communication_error")) + + @patch("proc.source_core_api.Issue") + def test_check_does_not_call_core_when_local_found(self, mock_issue_cls): + """Test that core API is NOT called when local data exists.""" + mock_issue = Mock() + mock_issue_cls.get.return_value = mock_issue + + mock_journal = Mock() + response = {"journal": mock_journal} + user = Mock() + + with patch("proc.source_core_api.fetch_and_create_issues") as mock_fetch: + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.check(response) + mock_fetch.assert_not_called() + + @patch("proc.source_core_api.Issue") + @patch("proc.source_core_api.fetch_and_create_issues") + def test_refresh_updates_response_on_success(self, mock_fetch, mock_issue_cls): + """Test that successful core fetch updates the issue in response.""" + mock_issue = Mock() + mock_issue_cls.get.return_value = mock_issue + + mock_journal = Mock() + response = {"journal": mock_journal, "issue": None} + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.refresh(response) + + self.assertEqual(response["issue"], mock_issue) + self.assertFalse(response.get("core_communication_error")) + + @patch("proc.source_core_api.fetch_and_create_issues") + def test_refresh_sets_error_flag_on_core_failure(self, mock_fetch): + """Test that core API failure sets the core_communication_error flag.""" + from proc.source_core_api import FetchIssueDataException + + mock_fetch.side_effect = FetchIssueDataException("Timeout") + + mock_journal = Mock() + response = {"journal": mock_journal, "issue": None} + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + checker.refresh(response) + + self.assertTrue(response.get("core_communication_error")) + + @patch("proc.source_core_api.fetch_and_create_issues") + @patch("proc.source_core_api.Issue") + def test_core_communication_error_resets_on_successful_fetch( + self, mock_issue_cls, mock_fetch + ): + """Test that core_communication_error is reset when fetch succeeds.""" + from proc.source_core_api import FetchIssueDataException + + mock_issue_cls.DoesNotExist = IssueDoesNotExist + mock_journal = Mock() + user = Mock() + + checker = UploadIssueDataChecker(mock_journal, "2024", "10", None, "1", user) + + # First fetch fails + mock_fetch.side_effect = FetchIssueDataException("Timeout") + checker.fetch_from_core() + self.assertTrue(checker.core_communication_error) + + # Second fetch succeeds + mock_fetch.side_effect = None + checker.fetch_from_core() + self.assertFalse(checker.core_communication_error) + + @patch("packtools.sps.models.front_articlemeta_issue.ArticleMetaIssue") + @patch("packtools.sps.models.dates.ArticleDates") + def test_from_xmltree_creates_checker(self, mock_dates_cls, mock_meta_cls): + """Test that from_xmltree correctly creates a checker from XML data.""" + mock_dates = Mock() + mock_dates.collection_date = {"year": "2024"} + mock_dates_cls.return_value = mock_dates + + mock_meta = Mock() + mock_meta.volume = "10" + mock_meta.suppl = None + mock_meta.number = "1" + mock_meta_cls.return_value = mock_meta + + xmltree = Mock() + user = Mock() + mock_journal = Mock() + + checker = UploadIssueDataChecker.from_xmltree(xmltree, user, mock_journal) + + self.assertEqual(checker.publication_year, "2024") + self.assertEqual(checker.volume, "10") + self.assertEqual(checker.number, "1") + self.assertIsNone(checker.suppl) + self.assertIsInstance(checker, UploadIssueDataChecker) + + +class CheckXmlAndRegisteredDataCompatibilityTestCase(unittest.TestCase): + """Test cases for _check_xml_and_registered_data_compatibility().""" + + def test_no_article_does_nothing(self): + """Test that function returns without error when there is no article.""" + response = {"article": None, "journal": Mock(), "issue": Mock()} + journal_checker = Mock() + issue_checker = Mock() + + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + def test_matching_journal_and_issue_passes(self): + """Test that function passes when journal and issue match.""" + mock_journal = Mock() + mock_issue = Mock() + mock_article = Mock() + mock_article.journal = mock_journal + mock_article.issue = mock_issue + + response = { + "article": mock_article, + "journal": mock_journal, + "issue": mock_issue, + } + journal_checker = Mock() + issue_checker = Mock() + + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + def test_journal_divergence_triggers_core_refresh(self): + """Test that journal divergence triggers a refresh from core.""" + mock_journal_xml = Mock() + mock_journal_article = Mock() + mock_issue = Mock() + mock_article = Mock() + mock_article.journal = mock_journal_article + mock_article.issue = mock_issue + + response = { + "article": mock_article, + "journal": mock_journal_xml, + "issue": mock_issue, + } + + journal_checker = Mock() + issue_checker = Mock() + + with self.assertRaises(PackageDataError): + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + journal_checker.refresh.assert_called_once() + + def test_journal_divergence_resolved_after_refresh(self): + """Test that no error is raised when divergence is resolved after refresh.""" + mock_journal = Mock() + mock_issue = Mock() + mock_article = Mock() + mock_article.journal = mock_journal + mock_article.issue = mock_issue + + # Initially journal differs + mock_journal_xml = Mock() + response = { + "article": mock_article, + "journal": mock_journal_xml, + "issue": mock_issue, + } + + journal_checker = Mock() + issue_checker = Mock() + + # After refresh, journal matches + def refresh_side_effect(response): + response["journal"] = mock_journal + + journal_checker.refresh.side_effect = refresh_side_effect + + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + def test_journal_divergence_with_core_failure_includes_core_error_message(self): + """Test that core communication failure is mentioned when divergence persists and core failed.""" + mock_journal_xml = Mock() + mock_journal_article = Mock() + mock_issue = Mock() + mock_article = Mock() + mock_article.journal = mock_journal_article + mock_article.issue = mock_issue + + response = { + "article": mock_article, + "journal": mock_journal_xml, + "issue": mock_issue, + } + + journal_checker = Mock() + issue_checker = Mock() + + def refresh_side_effect(response): + response["core_communication_error"] = True + + journal_checker.refresh.side_effect = refresh_side_effect + + with self.assertRaises(PackageDataError) as context: + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + self.assertIn("CORE COMMUNICATION FAILURE", str(context.exception)) + + def test_issue_divergence_triggers_core_refresh(self): + """Test that issue divergence triggers a refresh from core.""" + mock_journal = Mock() + mock_issue_xml = Mock() + mock_issue_article = Mock() + mock_article = Mock() + mock_article.journal = mock_journal + mock_article.issue = mock_issue_article + + response = { + "article": mock_article, + "journal": mock_journal, + "issue": mock_issue_xml, + } + + journal_checker = Mock() + issue_checker = Mock() + + with self.assertRaises(PackageDataError): + _check_xml_and_registered_data_compatibility( + response, journal_checker, issue_checker + ) + + issue_checker.refresh.assert_called_once() diff --git a/upload/views.py b/upload/views.py index fc9e64684..ce5c1b2db 100644 --- a/upload/views.py +++ b/upload/views.py @@ -8,7 +8,7 @@ from article.models import Article from issue.models import Issue -from upload.models import Package, choices +from upload.models import Package, PkgValidationResult, choices from upload.tasks import ( task_receive_packages, task_publish_article, @@ -82,6 +82,12 @@ def set_pdf_paths(self, data, optz_dir): data["pdfs"] = [] def get_context_data(self): + blocking_errors = list( + PkgValidationResult.objects.filter( + report__package=self.instance, + status=choices.VALIDATION_RESULT_BLOCKING, + ).values_list("message", flat=True) + ) data = { "pkg_zip_name": self.instance.pkg_zip.name, "linked": self.instance.linked.all(), @@ -97,6 +103,7 @@ def get_context_data(self): "xml_info_reports": list(self.instance.xml_info_reports), "summary": self.instance.summary, "xml": self.instance.xml, + "blocking_errors": blocking_errors, } # optz_file_path, optz_dir = self.get_optimized_package_filepath_and_directory() diff --git a/upload/wagtail_hooks.py b/upload/wagtail_hooks.py index 00c48868e..9cfc39bef 100644 --- a/upload/wagtail_hooks.py +++ b/upload/wagtail_hooks.py @@ -5,11 +5,8 @@ from django.db.models import Q from wagtail import hooks -from wagtail_modeladmin.options import ( - ModelAdmin, - ModelAdminGroup, - modeladmin_register, -) +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup from config.menu import get_menu_order from upload.views import ( @@ -42,18 +39,15 @@ from team.models import has_permission -class PackageZipAdmin(ModelAdmin): +class PackageZipViewSet(SnippetViewSet): model = PackageZip # button_helper_class = UploadButtonHelper permission_helper_class = UploadPermissionHelper - create_view_enabled = True - create_view_class = PackageZipCreateView - inspect_view_enabled = False + add_view_class = PackageZipCreateView menu_label = _("Package upload") menu_icon = "folder" menu_order = 200 add_to_settings_menu = False - exclude_from_explorer = False list_per_page = 20 list_display = ( @@ -80,20 +74,16 @@ def get_queryset(self, request): return super().get_queryset(request).filter(**params) -class PackageAdmin(ModelAdmin): +class PackageViewSet(SnippetViewSet): model = Package button_helper_class = UploadButtonHelper permission_helper_class = UploadPermissionHelper - create_view_enabled = False - # create_view_class = PackageCreateView - inspect_view_enabled = True inspect_view_class = PackageAdminInspectView inspect_template_name = "modeladmin/upload/package/inspect.html" menu_label = _("Package admin") menu_icon = "folder" menu_order = 200 add_to_settings_menu = False - exclude_from_explorer = False list_per_page = 20 list_display = ( @@ -196,7 +186,7 @@ def get_queryset(self, request): return super().get_queryset(request).filter(status__in=status, **params) -class QualityAnalysisPackageAdmin(ModelAdmin): +class QualityAnalysisPackageViewSet(SnippetViewSet): model = QAPackage button_helper_class = UploadButtonHelper permission_helper_class = UploadPermissionHelper @@ -204,11 +194,9 @@ class QualityAnalysisPackageAdmin(ModelAdmin): menu_icon = "folder" menu_order = 200 edit_view_class = QAPackageEditView - inspect_view_enabled = True inspect_view_class = PackageAdminInspectView inspect_template_name = "modeladmin/upload/package/inspect.html" add_to_settings_menu = False - exclude_from_explorer = False list_per_page = 20 list_display = ( @@ -278,7 +266,7 @@ def get_queryset(self, request): return super().get_queryset(request).filter(status__in=status, **params) -class ReadyToPublishPackageAdmin(ModelAdmin): +class ReadyToPublishPackageViewSet(SnippetViewSet): model = ReadyToPublishPackage button_helper_class = UploadButtonHelper @@ -287,11 +275,9 @@ class ReadyToPublishPackageAdmin(ModelAdmin): menu_icon = "folder" menu_order = 200 edit_view_class = ReadyToPublishPackageEditView - inspect_view_enabled = True inspect_view_class = PackageAdminInspectView inspect_template_name = "modeladmin/upload/package/inspect.html" add_to_settings_menu = False - exclude_from_explorer = False list_per_page = 20 list_display = ( @@ -343,17 +329,15 @@ def get_queryset(self, request): return super().get_queryset(request).filter(status__in=status, **params) -class XMLErrorReportAdmin(ModelAdmin): +class XMLErrorReportViewSet(SnippetViewSet): model = XMLErrorReport permission_helper_class = UploadPermissionHelper edit_view_class = XMLErrorReportEditView # create_view_class = XMLErrorReportCreateView - inspect_view_enabled = True # inspect_view_class = XMLErrorReportAdminInspectView menu_label = _("XML Error Reports") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "package", "category", @@ -380,16 +364,14 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class XMLErrorAdmin(ModelAdmin): +class XMLErrorViewSet(SnippetViewSet): model = XMLError permission_helper_class = UploadPermissionHelper # create_view_class = XMLErrorCreateView - inspect_view_enabled = True # inspect_view_class = XMLErrorAdminInspectView menu_label = _("XML errors") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "subject", "attribute", @@ -422,17 +404,15 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class XMLInfoReportAdmin(ModelAdmin): +class XMLInfoReportViewSet(SnippetViewSet): model = XMLInfoReport permission_helper_class = UploadPermissionHelper edit_view_class = XMLInfoReportEditView # create_view_class = XMLInfoReportCreateView - inspect_view_enabled = True # inspect_view_class = XMLInfoReportAdminInspectView menu_label = _("XML Info Reports") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "package", "category", @@ -459,16 +439,14 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class XMLInfoAdmin(ModelAdmin): +class XMLInfoViewSet(SnippetViewSet): model = XMLInfo permission_helper_class = UploadPermissionHelper # create_view_class = XMLInfoCreateView - inspect_view_enabled = True # inspect_view_class = XMLInfoAdminInspectView menu_label = _("XML info") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "subject", "attribute", @@ -501,18 +479,16 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class ValidationReportAdmin(ModelAdmin): +class ValidationReportViewSet(SnippetViewSet): model = ValidationReport permission_helper_class = UploadPermissionHelper # create_view_class = ValidationReportCreateView edit_view_class = ValidationReportEditView - inspect_view_enabled = True # inspect_view_class = ValidationReportAdminInspectView menu_label = _("Validation Reports") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "package", "category", @@ -539,16 +515,14 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class ValidationAdmin(ModelAdmin): +class ValidationViewSet(SnippetViewSet): model = PkgValidationResult permission_helper_class = UploadPermissionHelper # create_view_class = ValidationCreateView - inspect_view_enabled = True # inspect_view_class = ValidationAdminInspectView menu_label = _("Validations") menu_icon = "error" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "subject", "status", @@ -572,16 +546,14 @@ def get_queryset(self, request): return super().get_queryset(request).filter(package__creator=request.user) -class UploadValidatorAdmin(ModelAdmin): +class UploadValidatorViewSet(SnippetViewSet): model = UploadValidator permission_helper_class = UploadPermissionHelper # create_view_class = ValidationCreateView - inspect_view_enabled = False # inspect_view_class = ValidationAdminInspectView menu_label = _("Upload Validator") menu_icon = "folder" add_to_settings_menu = False - exclude_from_explorer = False list_display = ( "collection", "max_xml_warnings_percentage", @@ -605,20 +577,16 @@ def get_queryset(self, request): return super().get_queryset(request).none() -class ArchivedPackageAdmin(ModelAdmin): +class ArchivedPackageViewSet(SnippetViewSet): model = ArchivedPackage button_helper_class = UploadButtonHelper permission_helper_class = UploadPermissionHelper - create_view_enabled = False - # create_view_class = PackageCreateView - inspect_view_enabled = True inspect_view_class = PackageAdminInspectView inspect_template_name = "modeladmin/upload/package/inspect.html" menu_label = _("Archived Packages") menu_icon = "folder" menu_order = 200 add_to_settings_menu = False - exclude_from_explorer = False list_per_page = 20 list_display = ( @@ -672,39 +640,39 @@ def get_queryset(self, request): ) -class UploadModelAdminGroup(ModelAdminGroup): +class UploadViewSetGroup(SnippetViewSetGroup): menu_icon = "folder" menu_label = "Upload" - items = ( - PackageZipAdmin, - PackageAdmin, - QualityAnalysisPackageAdmin, - ReadyToPublishPackageAdmin, - ArchivedPackageAdmin, - ) + items = [ + PackageZipViewSet, + PackageViewSet, + QualityAnalysisPackageViewSet, + ReadyToPublishPackageViewSet, + ArchivedPackageViewSet, + ] menu_order = get_menu_order("upload") -modeladmin_register(UploadModelAdminGroup) +register_snippet(UploadViewSetGroup) -class UploadReportsModelAdminGroup(ModelAdminGroup): +class UploadReportsViewSetGroup(SnippetViewSetGroup): menu_icon = "folder" menu_label = _("Error management") - items = ( + items = [ # os itens a seguir possibilitam que na página Package.inspect # funcionem os links para os relatórios - XMLErrorAdmin, - XMLErrorReportAdmin, - XMLInfoReportAdmin, - ValidationAdmin, - ValidationReportAdmin, - UploadValidatorAdmin, - ) + XMLErrorViewSet, + XMLErrorReportViewSet, + XMLInfoReportViewSet, + ValidationViewSet, + ValidationReportViewSet, + UploadValidatorViewSet, + ] menu_order = get_menu_order("upload-error") -modeladmin_register(UploadReportsModelAdminGroup) +register_snippet(UploadReportsViewSetGroup) @hooks.register("register_admin_urls")