diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index e32b785baa85..f6bfdf13be77 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -1,6 +1,7 @@ """ Content index and search API using Meilisearch """ + from __future__ import annotations import logging @@ -11,6 +12,7 @@ from functools import wraps from typing import Callable, Generator, cast # noqa: UP035 +from attrs import define from django.conf import settings from django.contrib.auth import get_user_model from django.core.cache import cache @@ -31,6 +33,7 @@ from openedx.core.djangoapps.content.search.index_config import ( INDEX_DISTINCT_ATTRIBUTE, INDEX_FILTERABLE_ATTRIBUTES, + INDEX_PRIMARY_KEY, INDEX_RANKING_RULES, INDEX_SEARCHABLE_ATTRIBUTES, INDEX_SORTABLE_ATTRIBUTES, @@ -75,7 +78,7 @@ MAX_ACCESS_IDS_IN_FILTER = 1_000 MAX_ORGS_IN_FILTER = 1_000 -EXCLUDED_XBLOCK_TYPES = ['course', 'course_info'] +EXCLUDED_XBLOCK_TYPES = ["course", "course_info"] @contextmanager @@ -157,7 +160,7 @@ def _wait_for_meili_task(info: TaskInfo) -> None: current_status = client.get_task(info.task_uid) if current_status.status != "succeeded": try: - err_reason = current_status.error['message'] + err_reason = current_status.error["message"] except (TypeError, KeyError): err_reason = "Unknown error" raise MeilisearchError(err_reason) @@ -207,9 +210,7 @@ def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generat _wait_for_meili_task(client.delete_index(temp_index_name)) status_cb("Creating new index...") - _wait_for_meili_task( - client.create_index(temp_index_name, {'primaryKey': 'id'}) - ) + _wait_for_meili_task(client.create_index(temp_index_name, {"primaryKey": INDEX_PRIMARY_KEY})) new_index_created = client.get_index(temp_index_name).created_at yield temp_index_name @@ -219,7 +220,7 @@ def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generat status_cb("Preparing to swap into index (first time)...") _wait_for_meili_task(client.create_index(STUDIO_INDEX_NAME)) status_cb("Swapping index...") - client.swap_indexes([{'indexes': [temp_index_name, STUDIO_INDEX_NAME]}]) + client.swap_indexes([{"indexes": [temp_index_name, STUDIO_INDEX_NAME]}]) # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 while True: @@ -244,28 +245,49 @@ def _index_is_empty(index_name: str) -> bool: return index.get_stats().number_of_documents == 0 -def _configure_index(index_name): +def _apply_index_settings( + index_name: str, + wait: bool, + status_cb: Callable[[str], None] | None = None, +) -> None: """ - Configure the index. The following index settings are best changed on an empty index. - Changing them on a populated index will "re-index all documents in the index", which can take some time. + Apply the standard Meilisearch settings to an index. + + When wait=False, settings are sent in fire-and-forget mode. This is appropriate + for empty temporary indexes that will immediately be populated on the same Meilisearch + task queue. + + When wait=True, each settings task is synchronously waited on before returning. + This is appropriate when reconciling a live index and we need confirmation that the + settings have been applied before returning. Args: - index_name (str): The name of the index to configure + index_name: The name of the index to configure. + wait: Whether to wait for each Meilisearch settings task to complete. + status_cb: Optional callback for status messages when wait=True. """ + if status_cb is None: + status_cb = log.info + client = _get_meilisearch_client() + index = client.index(index_name) + + settings_updates = ( + ("distinct attribute", index.update_distinct_attribute, INDEX_DISTINCT_ATTRIBUTE), + ("filterable attributes", index.update_filterable_attributes, INDEX_FILTERABLE_ATTRIBUTES), + ("searchable attributes", index.update_searchable_attributes, INDEX_SEARCHABLE_ATTRIBUTES), + ("sortable attributes", index.update_sortable_attributes, INDEX_SORTABLE_ATTRIBUTES), + ("ranking rules", index.update_ranking_rules, INDEX_RANKING_RULES), + ) - # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): - client.index(index_name).update_distinct_attribute(INDEX_DISTINCT_ATTRIBUTE) - # Mark which attributes can be used for filtering/faceted search: - client.index(index_name).update_filterable_attributes(INDEX_FILTERABLE_ATTRIBUTES) - # Mark which attributes are used for keyword search, in order of importance: - client.index(index_name).update_searchable_attributes(INDEX_SEARCHABLE_ATTRIBUTES) - # Mark which attributes can be used for sorting search results: - client.index(index_name).update_sortable_attributes(INDEX_SORTABLE_ATTRIBUTES) + for label, update_method, value in settings_updates: + status_cb(f"Applying {label} to '{index_name}'...") + if wait: + _wait_for_meili_task(update_method(value)) + else: + update_method(value) - # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance. - # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy - client.index(index_name).update_ranking_rules(INDEX_RANKING_RULES) + status_cb(f"All settings applied to '{index_name}'.") def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None: @@ -315,11 +337,13 @@ def only_if_meilisearch_enabled(f): """ Only call `f` if meilisearch is enabled """ + @wraps(f) def wrapper(*args, **kwargs): """Wraps the decorated function.""" if is_meilisearch_enabled(): return f(*args, **kwargs) + return wrapper @@ -342,63 +366,179 @@ def reset_index(status_cb: Callable[[str], None] | None = None) -> None: status_cb("Creating new empty index...") with _using_temp_index(status_cb) as temp_index_name: - _configure_index(temp_index_name) + _apply_index_settings(temp_index_name, wait=False) status_cb("Index recreated!") status_cb("Index reset complete.") -def _is_index_configured(index_name: str) -> bool: +@define +class IndexDrift: + """ + Represents the drift state of a Meilisearch index compared to the expected configuration. + """ + + exists: bool + is_empty: bool | None = None # None if index doesn't exist + primary_key_correct: bool | None = None # None if index doesn't exist + distinct_attribute_match: bool | None = None + filterable_attributes_match: bool | None = None + searchable_attributes_match: bool | None = None + sortable_attributes_match: bool | None = None + ranking_rules_match: bool | None = None + + @property + def is_settings_drifted(self) -> bool: + """True if any of the 5 settings fields is False (not None, but explicitly False).""" + return any( + setting_fields is False + for setting_fields in ( + self.distinct_attribute_match, + self.filterable_attributes_match, + self.searchable_attributes_match, + self.sortable_attributes_match, + self.ranking_rules_match, + ) + ) + + +def _detect_index_drift(index_name: str) -> IndexDrift: """ - Check if an index is completely configured + Inspect the current state of a Meilisearch index and return a structured drift report. + + It provides per-setting match status plus primary key and emptiness information. Args: - index_name (str): The name of the index to check + index_name (str): The name of the index to inspect. + + Returns: + IndexDrift: Structured drift report. """ + if not _index_exists(index_name): + return IndexDrift(exists=False) + client = _get_meilisearch_client() index = client.get_index(index_name) + + # Check primary key + primary_key_correct = index.primary_key == INDEX_PRIMARY_KEY + + # Check emptiness + is_empty = index.get_stats().number_of_documents == 0 + + # Check settings index_settings = index.get_settings() - for k, v in ( - ("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE), - ("filterableAttributes", INDEX_FILTERABLE_ATTRIBUTES), - ("searchableAttributes", INDEX_SEARCHABLE_ATTRIBUTES), - ("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES), - ("rankingRules", INDEX_RANKING_RULES), - ): - setting = index_settings.get(k, []) - if isinstance(v, list): - v = set(v) - setting = set(setting) - if setting != v: - return False - return True + def _compare_setting(key, expected): + """Compare a single setting value against the expected value.""" + actual = index_settings.get(key, [] if isinstance(expected, list) else None) + if isinstance(expected, list): + # For ranking rules, order matters; for other lists, it doesn't + if key == "rankingRules": + return list(actual) == list(expected) + return set(actual) == set(expected) + return actual == expected + + return IndexDrift( + exists=True, + is_empty=is_empty, + primary_key_correct=primary_key_correct, + distinct_attribute_match=_compare_setting("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE), + filterable_attributes_match=_compare_setting("filterableAttributes", INDEX_FILTERABLE_ATTRIBUTES), + searchable_attributes_match=_compare_setting("searchableAttributes", INDEX_SEARCHABLE_ATTRIBUTES), + sortable_attributes_match=_compare_setting("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES), + ranking_rules_match=_compare_setting("rankingRules", INDEX_RANKING_RULES), + ) -def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None: + +def reconcile_index( + status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None +) -> None: # noqa: E501 """ - Initialize the Meilisearch index, creating it and configuring it if it doesn't exist + Reconcile the Meilisearch index state. + + Inspects the current Studio Meilisearch index and takes appropriate action based on its state: + - Creates the index if missing. + - Reconfigures if empty and drifted. + - Applies updated settings if populated and drifted. + - Recreates the index if primary key is mismatched (even if populated — data loss is unavoidable). + - No-ops if everything is correctly configured. + + This is the primary reconciliation entry point, called from post_migrate and init_index(). """ if status_cb is None: status_cb = log.info if warn_cb is None: warn_cb = log.warning - if _index_exists(STUDIO_INDEX_NAME): - if _index_is_empty(STUDIO_INDEX_NAME): + drift = _detect_index_drift(STUDIO_INDEX_NAME) + + # CASE: Index missing + if not drift.exists: + status_cb("Studio search index not found. Creating and configuring...") + reset_index(status_cb) + status_cb("Index created. Run './manage.py cms reindex_studio' to populate.") + return + + # CASE: Primary key mismatch (must recreate regardless of population state) + if not drift.primary_key_correct: + if drift.is_empty: + warn_cb("Primary key mismatch on empty index. Recreating...") + else: warn_cb( - "The studio search index is empty. Please run ./manage.py cms reindex_studio" - " [--incremental]" + f"PRIMARY KEY MISMATCH on populated index '{STUDIO_INDEX_NAME}'. " + "Index must be recreated (data loss is unavoidable for primary key changes)." ) - return - if not _is_index_configured(STUDIO_INDEX_NAME): - warn_cb( - "A rebuild of the index is required. Please run ./manage.py cms reindex_studio" - " [--incremental]" + warn_cb("Dropping and recreating index. Repopulate with: './manage.py cms reindex_studio'") + reset_index(status_cb) + warn_cb("Index recreated empty. Run './manage.py cms reindex_studio' to repopulate.") + return + + # CASE: Index empty + if drift.is_empty: + if drift.is_settings_drifted: + status_cb("Empty index has drifted settings. Reconfiguring...") + _apply_index_settings(STUDIO_INDEX_NAME, wait=True, status_cb=status_cb) + status_cb("Reconfigured. Run './manage.py cms reindex_studio' to populate.") + else: + status_cb( + "Index exists and is correctly configured but empty. Run './manage.py cms reindex_studio' to populate." ) - return - status_cb("Index already exists and is configured.") return - reset_index(status_cb) + # CASE: Index populated, attribute drifted i.e settings mismatched + if drift.is_settings_drifted: + warn_cb(f"Settings drift detected on populated index '{STUDIO_INDEX_NAME}'. Applying updated settings...") + # Log per-setting mismatch details + for field_name, match in ( + ("distinctAttribute", drift.distinct_attribute_match), + ("filterableAttributes", drift.filterable_attributes_match), + ("searchableAttributes", drift.searchable_attributes_match), + ("sortableAttributes", drift.sortable_attributes_match), + ("rankingRules", drift.ranking_rules_match), + ): + if match is False: + warn_cb(f" - {field_name}: DRIFTED") + + _apply_index_settings(STUDIO_INDEX_NAME, wait=True, status_cb=status_cb) + warn_cb( + "Settings applied. Meilisearch will re-index documents in the background. " + "Consider running './manage.py cms reindex_studio' for a full rebuild " + "if search quality is affected." + ) + else: + status_cb("Index is populated and correctly configured. No action needed.") + + +def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None: + """ + This method is depricated as of Verawood and would be removed in the future release. + + Initialize the Meilisearch index, creating it and configuring it if it doesn't exist. + + This is a compatibility wrapper around reconcile_index(). + """ + log.warning("init_index is deprecated as of Verawood and will be removed in the future release.") + reconcile_index(status_cb=status_cb, warn_cb=warn_cb) def index_course(course_key: CourseKey, index_name: str | None = None) -> list: @@ -414,7 +554,7 @@ def index_course(course_key: CourseKey, index_name: str | None = None) -> list: course = store.get_course(course_key, depth=None) def add_with_children(block): - """ Recursively index the given XBlock/component """ + """Recursively index the given XBlock/component""" doc = searchable_doc_for_course_block(block) doc.update(searchable_doc_tags(block.usage_key)) docs.append(doc) # pylint: disable=cell-var-from-loop @@ -429,7 +569,9 @@ def add_with_children(block): return docs -def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None: # lint-amnesty, pylint: disable=too-many-statements +def rebuild_index( # pylint: disable=too-many-statements + status_cb: Callable[[str], None] | None = None, incremental=False +) -> None: # lint-amnesty """ Rebuild the Meilisearch index from scratch """ @@ -468,7 +610,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa # Changing them on a populated index will "re-index all documents in the index", which can take some time # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. if not incremental: - _configure_index(index_name) + _apply_index_settings(index_name, wait=False) ############## Libraries ############## status_cb("Indexing libraries...") @@ -590,7 +732,7 @@ def index_container_batch(batch, num_done, library_key) -> int: status_cb("Indexing courses...") # To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses: - paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000) + paginator = Paginator(CourseOverview.objects.only("id", "display_name"), 1000) for p in paginator.page_range: for course in paginator.page(p).object_list: status_cb( @@ -627,7 +769,7 @@ def upsert_xblock_index_doc(usage_key: UsageKey, recursive: bool = True) -> None docs = [] def add_with_children(block): - """ Recursively index the given XBlock/component """ + """Recursively index the given XBlock/component""" doc = searchable_doc_for_course_block(block) docs.append(doc) if recursive: @@ -710,9 +852,7 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None: library_block = lib_api.get_component_from_usage_key(usage_key) library_block_metadata = lib_api.LibraryXBlockMetadata.from_component(usage_key.context_key, library_block) - docs = [ - searchable_doc_for_library_block(library_block_metadata) - ] + docs = [searchable_doc_for_library_block(library_block_metadata)] _update_index_docs(docs) @@ -747,8 +887,7 @@ def upsert_library_collection_index_doc(collection_key: LibraryCollectionLocator # Soft-deleted/disabled collections are removed from the index # and their components updated. - if doc.get('_disabled'): - + if doc.get("_disabled"): _delete_index_doc(doc[Fields.id]) update_items = True @@ -756,7 +895,6 @@ def upsert_library_collection_index_doc(collection_key: LibraryCollectionLocator # Hard-deleted collections are also deleted from the index, # but their components are automatically updated as part of the deletion process, so we don't have to. elif not doc.get(Fields.type): - _delete_index_doc(doc[Fields.id]) # Otherwise, upsert the collection. @@ -807,8 +945,7 @@ def update_library_components_collections( docs.append(doc) log.info( - f"Updating document.collections for library {library_key} components" - f" page {page} / {paginator.num_pages}" + f"Updating document.collections for library {library_key} components page {page} / {paginator.num_pages}" ) _update_index_docs(docs) @@ -824,10 +961,14 @@ def update_library_containers_collections( """ library_key = collection_key.lib_key library = lib_api.get_library(library_key) - container_entities = content_api.get_collection_entities( - library.learning_package_id, - collection_key.collection_id, - ).exclude(container=None).select_related("container") + container_entities = ( + content_api.get_collection_entities( + library.learning_package_id, + collection_key.collection_id, + ) + .exclude(container=None) + .select_related("container") + ) paginator = Paginator(container_entities, batch_size) for page in paginator.page_range: @@ -843,8 +984,7 @@ def update_library_containers_collections( docs.append(doc) log.info( - f"Updating document.collections for library {library_key} containers" - f" page {page} / {paginator.num_pages}" + f"Updating document.collections for library {library_key} containers page {page} / {paginator.num_pages}" ) _update_index_docs(docs) @@ -859,13 +999,11 @@ def upsert_library_container_index_doc(container_key: LibraryContainerLocator) - # Soft-deleted/disabled containers are removed from the index # and their components updated. - if doc.get('_disabled'): - + if doc.get("_disabled"): _delete_index_doc(doc[Fields.id]) # Hard-deleted containers are also deleted from the index elif not doc.get(Fields.type): - _delete_index_doc(doc[Fields.id]) # Otherwise, upsert the container. @@ -935,11 +1073,9 @@ def _get_user_orgs(request: Request) -> list[str]: Note: org-level roles have course_id=None to distinguish them from course-level roles. """ course_roles = get_course_roles(request.user) - return list(set( - role.org - for role in course_roles - if role.course_id is None and role.role in ['staff', 'instructor'] - )) + return list( + set(role.org for role in course_roles if role.course_id is None and role.role in ["staff", "instructor"]) + ) def _get_meili_access_filter(request: Request) -> dict: @@ -1032,7 +1168,7 @@ def fetch_block_types(extra_filter: Filter | None = None): "facets": ["block_type"], "filter": extra_filter_formatted, "limit": 0, - } + }, ) return response @@ -1065,7 +1201,7 @@ def get_all_blocks_from_context( "limit": limit, "offset": offset, "attributesToRetrieve": ["usage_key"] + (extra_attributes_to_retrieve or []), - } + }, ) yield from response["hits"] diff --git a/openedx/core/djangoapps/content/search/apps.py b/openedx/core/djangoapps/content/search/apps.py index 4a377f0a92a1..38e3ad92aa4a 100644 --- a/openedx/core/djangoapps/content/search/apps.py +++ b/openedx/core/djangoapps/content/search/apps.py @@ -10,7 +10,13 @@ class ContentSearchConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "openedx.core.djangoapps.content.search" + label = "search" def ready(self): # Connect signal handlers + # Connect post_migrate for Meilisearch index reconciliation. + # No sender= argument here; the handler filters by sender.label internally. + from django.db.models.signals import post_migrate # pylint: disable=import-outside-toplevel + from . import handlers # pylint: disable=unused-import # noqa: F401 + post_migrate.connect(handlers.handle_post_migrate) diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index fe5ddecdbf9f..0fe93292d8fa 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -47,7 +47,9 @@ from xmodule.modulestore.django import SignalHandler from .api import ( + is_meilisearch_enabled, only_if_meilisearch_enabled, + reconcile_index, upsert_content_object_tags_index_doc, upsert_item_collections_index_docs, upsert_item_containers_index_docs, @@ -68,6 +70,37 @@ log = logging.getLogger(__name__) +def handle_post_migrate(sender, **kwargs): + """ + Reconcile Meilisearch index state after Django migrations run. + + Filters on sender.label to only execute for the search app's post_migrate signal. + Tolerant of Meilisearch unavailability — logs a warning and continues. + """ + from .apps import ContentSearchConfig # pylint: disable=import-outside-toplevel + + if sender.label != ContentSearchConfig.label: + return + + if not is_meilisearch_enabled(): + return + + try: + reconcile_index(status_cb=log.info, warn_cb=log.warning) + except ConnectionError as exc: + log.warning( + "Meilisearch reconciliation skipped during post_migrate: %s. " + "Will retry on next migrate run.", + exc, + ) + except Exception as exc: # pylint: disable=broad-except + log.warning( + "Meilisearch reconciliation failed during post_migrate: %s. " + "Will retry on next migrate run.", + exc, + ) + + # Using post_delete here because there is no COURSE_DELETED event defined. @receiver(post_delete, sender=CourseOverview) def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument diff --git a/openedx/core/djangoapps/content/search/index_config.py b/openedx/core/djangoapps/content/search/index_config.py index c4fc01e98933..0f55a00347fc 100644 --- a/openedx/core/djangoapps/content/search/index_config.py +++ b/openedx/core/djangoapps/content/search/index_config.py @@ -1,6 +1,9 @@ """Configuration for the search index.""" from .documents import Fields +# The Meilisearch primary key for all documents in the index. +INDEX_PRIMARY_KEY = Fields.id + INDEX_DISTINCT_ATTRIBUTE = "usage_key" # Mark which attributes can be used for filtering/faceted search: diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index a320790739a1..c2c0b83a25e1 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -1,43 +1,100 @@ """ -Command to build or re-build the search index for courses (in Studio, i.e. Draft -mode), in Meilisearch. +Command to queue incremental population of the Studio Meilisearch search index. + +Index creation, configuration, and schema reconciliation are handled +automatically via the post_migrate signal. This command is solely +responsible for enqueuing the population task in Celery. See also cms/djangoapps/contentstore/management/commands/reindex_course.py which indexes LMS (published) courses in ElasticSearch. """ + +import logging + +from django.conf import settings from django.core.management import BaseCommand, CommandError from ... import api +from ...tasks import rebuild_index_incremental + +log = logging.getLogger(__name__) class Command(BaseCommand): """ - Build or re-build the Meilisearch search index for courses and libraries in Studio. + Add all course and library content to the Studio search index. + + This enqueues a Celery task that incrementally indexes all courses and + libraries. Progress is tracked via IncrementalIndexCompleted, so the task + can safely resume if interrupted. + + Index creation and configuration are handled by post_migrate reconciliation + (runs automatically on ./manage.py cms migrate). - This is separate from LMS search features like courseware search or forum search. + If it's ever necessary to reset the incremental indexing state (force + the full re-index process to start from the beginning), use: + + ./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()' + + This will delete all the IncrementalIndexCompleted records and will help in restarting the index population. """ - # TODO: improve this - see https://github.com/openedx/edx-platform/issues/36868 + help = "Add all course and library content to the Studio search index." def add_arguments(self, parser): - parser.add_argument("--experimental", action="store_true") # kept for compatibility but ignored. - parser.add_argument("--reset", action="store_true") - parser.add_argument("--init", action="store_true") - parser.add_argument("--incremental", action="store_true") - parser.set_defaults(experimental=False, reset=False, init=False, incremental=False) + # Removed flags — provide clear error messages for operators with old automation. + parser.add_argument( + "--reset", + action="store_true", + default=False, + help="(Removed) Index reset is now handled by post_migrate reconciliation.", + ) + parser.add_argument( + "--init", + action="store_true", + default=False, + help="(Removed) Index initialization is now handled by post_migrate reconciliation.", + ) + parser.add_argument( + "--incremental", + action="store_true", + default=False, + help="(Removed) Incremental is now the default and only population mode.", + ) def handle(self, *args, **options): - """ - Build a new search index for Studio, containing content from courses and libraries - """ if not api.is_meilisearch_enabled(): raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.") if options["reset"]: - api.reset_index(self.stdout.write) - elif options["init"]: - api.init_index(self.stdout.write, self.stderr.write) - elif options["incremental"]: - api.rebuild_index(self.stdout.write, incremental=True) + raise CommandError( + "The --reset flag has been removed. " + "Index reset is now handled automatically by post_migrate reconciliation. " + "Run: ./manage.py cms migrate" + ) + + if options["init"]: + raise CommandError( + "The --init flag has been removed. " + "Index initialization is now handled automatically by post_migrate reconciliation. " + "Run: ./manage.py cms migrate" + ) + + if options["incremental"]: + log.warning( + "The --incremental flag has been removed. " + "Incremental population is now the default behavior of this command." + ) + + result = rebuild_index_incremental.delay() + + if settings.CELERY_ALWAYS_EAGER: + self.stdout.write("Indexing complete!") else: - api.rebuild_index(self.stdout.write) + self.stdout.write( + f"Studio search index population has been queued (task_id={result.id}). " + "Population will run incrementally in a Celery worker. " + "Monitor progress in Celery worker logs. " + "In order to reset the incremental indexing state, please run: " + "./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()'" + ) diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py index 8d18c2aae6ee..a95bedb062db 100644 --- a/openedx/core/djangoapps/content/search/tasks.py +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -184,3 +184,36 @@ def delete_course_index_docs(course_key_str: str) -> None: # Delete children index data for course blocks. api.delete_docs_with_context_key(course_key) + + +@shared_task( + base=LoggedTask, + autoretry_for=(MeilisearchError, ConnectionError), + max_retries=3, + retry_backoff=True, +) +@set_code_owner_attribute +def rebuild_index_incremental() -> None: + """ + Celery task to incrementally populate the Studio Meilisearch index. + + Uses IncrementalIndexCompleted to track progress and resume from where + it left off if interrupted. Safe to call multiple times — already-indexed + contexts are skipped. + + If a rebuild is already in progress (lock held), the task exits gracefully. + """ + log.info("Starting incremental Studio search index population...") + + try: + api.rebuild_index(status_cb=log.info, incremental=True) + except RuntimeError as exc: + # rebuild_index -> _using_temp_index or lock contention + if "already in progress" in str(exc).lower(): + log.warning( + "Studio index population skipped: a rebuild is already in progress. Will retry later if re-enqueued." + ) + return + raise + + log.info("Incremental Studio search index population complete.") diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index 356125febf19..8a29bb450326 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -449,19 +449,32 @@ def test_reset_meilisearch_index(self, mock_meilisearch) -> None: @override_settings(MEILISEARCH_ENABLED=True) def test_init_meilisearch_index(self, mock_meilisearch) -> None: - # Test index already exists + # Test index already exists, is populated, and correctly configured + mock_index = Mock() + mock_index.primary_key = "id" + mock_index.get_stats.return_value = Mock(number_of_documents=100) + mock_index.get_settings.return_value = { + "distinctAttribute": "usage_key", + "filterableAttributes": list(api.INDEX_FILTERABLE_ATTRIBUTES), + "searchableAttributes": list(api.INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(api.INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": list(api.INDEX_RANKING_RULES), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + api.init_index() mock_meilisearch.return_value.swap_indexes.assert_not_called() mock_meilisearch.return_value.create_index.assert_not_called() mock_meilisearch.return_value.delete_index.assert_not_called() - # Test index already exists and has no documents - mock_meilisearch.return_value.get_stats.return_value = 0 + # Test index already exists and is empty but correctly configured + mock_index.get_stats.return_value = Mock(number_of_documents=0) api.init_index() mock_meilisearch.return_value.swap_indexes.assert_not_called() mock_meilisearch.return_value.create_index.assert_not_called() mock_meilisearch.return_value.delete_index.assert_not_called() + # Test index does not exist — should create it mock_meilisearch.return_value.get_index.side_effect = [ MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')), MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')), diff --git a/openedx/core/djangoapps/content/search/tests/test_reconcile.py b/openedx/core/djangoapps/content/search/tests/test_reconcile.py new file mode 100644 index 000000000000..6904f555ebb4 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_reconcile.py @@ -0,0 +1,611 @@ +""" +Tests for the Meilisearch index reconciliation logic. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, Mock, patch + +import pytest +from django.test import TestCase, override_settings +from meilisearch.errors import MeilisearchError + +from openedx.core.djangolib.testing.utils import skip_unless_cms + +try: + from .. import api + from ..api import ( + IndexDrift, + _apply_index_settings, + _detect_index_drift, + reconcile_index, + ) + from ..apps import ContentSearchConfig + from ..handlers import handle_post_migrate + from ..index_config import ( + INDEX_DISTINCT_ATTRIBUTE, + INDEX_FILTERABLE_ATTRIBUTES, + INDEX_PRIMARY_KEY, + INDEX_RANKING_RULES, + INDEX_SEARCHABLE_ATTRIBUTES, + INDEX_SORTABLE_ATTRIBUTES, + ) +except RuntimeError: + pass + + +@skip_unless_cms +class TestIndexDrift(TestCase): + """Tests for the IndexDrift dataclass.""" + + def test_all_fields_match(self): + drift = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + assert drift.exists is True + assert drift.is_empty is False + assert drift.primary_key_correct is True + assert drift.distinct_attribute_match is True + assert drift.filterable_attributes_match is True + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is True + assert drift.ranking_rules_match is True + assert not drift.is_settings_drifted + + def test_settings_drifted_when_one_setting_false(self): + drift = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=False, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + assert drift.primary_key_correct is True + assert drift.filterable_attributes_match is False + assert drift.is_settings_drifted + + def test_primary_key_wrong_without_settings_drift(self): + drift = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=False, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + assert drift.primary_key_correct is False + assert drift.distinct_attribute_match is True + assert drift.filterable_attributes_match is True + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is True + assert drift.ranking_rules_match is True + assert not drift.is_settings_drifted + + def test_missing_index_leaves_optional_fields_unset(self): + drift = IndexDrift(exists=False) + assert drift.exists is False + assert drift.is_empty is None + assert drift.primary_key_correct is None + assert drift.distinct_attribute_match is None + assert drift.filterable_attributes_match is None + assert drift.searchable_attributes_match is None + assert drift.sortable_attributes_match is None + assert drift.ranking_rules_match is None + assert not drift.is_settings_drifted + + def test_multiple_settings_drifted(self): + drift = IndexDrift( + exists=True, + is_empty=True, + primary_key_correct=True, + distinct_attribute_match=False, + filterable_attributes_match=False, + searchable_attributes_match=True, + sortable_attributes_match=False, + ranking_rules_match=True, + ) + assert drift.distinct_attribute_match is False + assert drift.filterable_attributes_match is False + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is False + assert drift.ranking_rules_match is True + assert drift.is_settings_drifted + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestDetectIndexDrift(TestCase): + """Tests for _detect_index_drift().""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + def test_index_missing(self, mock_meilisearch): + """When the index doesn't exist, returns exists=False with all other fields None.""" + from meilisearch.errors import MeilisearchApiError + + mock_meilisearch.return_value.get_index.side_effect = MeilisearchApiError( + "Not found", Mock(text='{"code":"index_not_found"}') + ) + + drift = _detect_index_drift("test_index") + + assert not drift.exists + assert drift.is_empty is None + assert drift.primary_key_correct is None + assert drift.distinct_attribute_match is None + + def test_all_settings_match(self, mock_meilisearch): + """When all settings match, returns non-drifted state.""" + mock_index = Mock() + mock_index.primary_key = INDEX_PRIMARY_KEY + mock_index.get_stats.return_value = Mock(number_of_documents=100) + mock_index.get_settings.return_value = { + "distinctAttribute": INDEX_DISTINCT_ATTRIBUTE, + "filterableAttributes": list(INDEX_FILTERABLE_ATTRIBUTES), + "searchableAttributes": list(INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": list(INDEX_RANKING_RULES), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.exists + assert drift.is_empty is False + assert drift.primary_key_correct is True + assert drift.distinct_attribute_match is True + assert drift.filterable_attributes_match is True + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is True + assert drift.ranking_rules_match is True + assert not drift.is_settings_drifted + + def test_filterable_attributes_mismatch(self, mock_meilisearch): + """Detects when filterable attributes differ.""" + mock_index = Mock() + mock_index.primary_key = INDEX_PRIMARY_KEY + mock_index.get_stats.return_value = Mock(number_of_documents=0) + mock_index.get_settings.return_value = { + "distinctAttribute": INDEX_DISTINCT_ATTRIBUTE, + "filterableAttributes": ["some_other_field"], + "searchableAttributes": list(INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": list(INDEX_RANKING_RULES), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.exists + assert drift.is_empty + assert drift.filterable_attributes_match is False + assert drift.distinct_attribute_match is True + assert drift.is_settings_drifted + + def test_ranking_rules_order_matters(self, mock_meilisearch): + """Ranking rules comparison is order-sensitive.""" + mock_index = Mock() + mock_index.primary_key = INDEX_PRIMARY_KEY + mock_index.get_stats.return_value = Mock(number_of_documents=50) + # Reverse the ranking rules + mock_index.get_settings.return_value = { + "distinctAttribute": INDEX_DISTINCT_ATTRIBUTE, + "filterableAttributes": list(INDEX_FILTERABLE_ATTRIBUTES), + "searchableAttributes": list(INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": list(reversed(INDEX_RANKING_RULES)), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.ranking_rules_match is False + assert drift.is_settings_drifted + + def test_filterable_attributes_order_independent(self, mock_meilisearch): + """Filterable/searchable/sortable attributes comparison is order-independent.""" + mock_index = Mock() + mock_index.primary_key = INDEX_PRIMARY_KEY + mock_index.get_stats.return_value = Mock(number_of_documents=10) + mock_index.get_settings.return_value = { + "distinctAttribute": INDEX_DISTINCT_ATTRIBUTE, + "filterableAttributes": list(reversed(INDEX_FILTERABLE_ATTRIBUTES)), + "searchableAttributes": list(reversed(INDEX_SEARCHABLE_ATTRIBUTES)), + "sortableAttributes": list(reversed(INDEX_SORTABLE_ATTRIBUTES)), + "rankingRules": list(INDEX_RANKING_RULES), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.filterable_attributes_match is True + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is True + assert drift.primary_key_correct is True + assert not drift.is_settings_drifted + + def test_primary_key_mismatch(self, mock_meilisearch): + """Detects primary key mismatch.""" + mock_index = Mock() + mock_index.primary_key = "wrong_key" + mock_index.get_stats.return_value = Mock(number_of_documents=100) + mock_index.get_settings.return_value = { + "distinctAttribute": INDEX_DISTINCT_ATTRIBUTE, + "filterableAttributes": list(INDEX_FILTERABLE_ATTRIBUTES), + "searchableAttributes": list(INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": list(INDEX_RANKING_RULES), + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.exists is True + assert drift.is_empty is False + assert drift.primary_key_correct is False + # Settings themselves are fine + assert not drift.is_settings_drifted + + def test_multiple_mismatches(self, mock_meilisearch): + """Reports all drifted fields when multiple settings differ.""" + mock_index = Mock() + mock_index.primary_key = INDEX_PRIMARY_KEY + mock_index.get_stats.return_value = Mock(number_of_documents=0) + mock_index.get_settings.return_value = { + "distinctAttribute": "wrong_attr", + "filterableAttributes": ["wrong"], + "searchableAttributes": list(INDEX_SEARCHABLE_ATTRIBUTES), + "sortableAttributes": list(INDEX_SORTABLE_ATTRIBUTES), + "rankingRules": ["wrong"], + } + mock_meilisearch.return_value.get_index.return_value = mock_index + + drift = _detect_index_drift("test_index") + + assert drift.distinct_attribute_match is False + assert drift.filterable_attributes_match is False + assert drift.searchable_attributes_match is True + assert drift.sortable_attributes_match is True + assert drift.ranking_rules_match is False + assert drift.is_settings_drifted + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestApplyIndexSettings(TestCase): + """Tests for _apply_index_settings().""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + @patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) + def test_applies_all_settings(self, mock_meilisearch): + """All 5 settings are applied in wait mode.""" + mock_index = mock_meilisearch.return_value.index.return_value + status_cb = Mock() + + _apply_index_settings("test_index", wait=True, status_cb=status_cb) + + mock_index.update_distinct_attribute.assert_called_once_with(INDEX_DISTINCT_ATTRIBUTE) + mock_index.update_filterable_attributes.assert_called_once_with(INDEX_FILTERABLE_ATTRIBUTES) + mock_index.update_searchable_attributes.assert_called_once_with(INDEX_SEARCHABLE_ATTRIBUTES) + mock_index.update_sortable_attributes.assert_called_once_with(INDEX_SORTABLE_ATTRIBUTES) + mock_index.update_ranking_rules.assert_called_once_with(INDEX_RANKING_RULES) + + @patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task") + def test_waits_for_each_task(self, mock_wait, mock_meilisearch): + """Each settings update is waited on when wait=True.""" + mock_index = mock_meilisearch.return_value.index.return_value + task_info = Mock() + mock_index.update_distinct_attribute.return_value = task_info + mock_index.update_filterable_attributes.return_value = task_info + mock_index.update_searchable_attributes.return_value = task_info + mock_index.update_sortable_attributes.return_value = task_info + mock_index.update_ranking_rules.return_value = task_info + + _apply_index_settings("test_index", wait=True) + + assert mock_wait.call_count == 5 + + @patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task") + def test_does_not_wait_when_wait_false(self, mock_wait, mock_meilisearch): + """Settings are fire-and-forget when wait=False.""" + status_cb = Mock() + + _apply_index_settings("test_index", wait=False, status_cb=status_cb) + + mock_wait.assert_not_called() + status_cb.assert_called() + + @patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task") + def test_raises_on_task_failure(self, mock_wait, mock_meilisearch): + """MeilisearchError is raised if a waited-on task fails.""" + mock_wait.side_effect = MeilisearchError("Task failed") + + with pytest.raises(MeilisearchError): + _apply_index_settings("test_index", wait=True) + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestReconcileIndex(TestCase): + """Tests for reconcile_index().""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + @patch("openedx.core.djangoapps.content.search.api.reset_index") + def test_index_missing(self, mock_reset, mock_drift, mock_meilisearch): + """When index doesn't exist, reset_index is called to create it.""" + mock_drift.return_value = IndexDrift(exists=False) + status_cb = Mock() + + reconcile_index(status_cb=status_cb) + + mock_reset.assert_called_once() + status_cb.assert_any_call("Studio search index not found. Creating and configuring...") + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + def test_index_empty_configured(self, mock_drift, mock_meilisearch): + """When index is empty and configured, no action taken.""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=True, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + status_cb = Mock() + + reconcile_index(status_cb=status_cb) + + status_cb.assert_any_call( + "Index exists and is correctly configured but empty. Run './manage.py cms reindex_studio' to populate." + ) + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + @patch("openedx.core.djangoapps.content.search.api._apply_index_settings") + def test_index_empty_drifted_settings(self, mock_apply, mock_drift, mock_meilisearch): + """When index is empty and settings drifted, settings are applied.""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=True, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=False, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + status_cb = Mock() + + reconcile_index(status_cb=status_cb) + + mock_apply.assert_called_once_with(api.STUDIO_INDEX_NAME, wait=True, status_cb=status_cb) + status_cb.assert_any_call("Empty index has drifted settings. Reconfiguring...") + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + @patch("openedx.core.djangoapps.content.search.api.reset_index") + def test_index_empty_wrong_pk(self, mock_reset, mock_drift, mock_meilisearch): + """When index is empty with wrong PK, reset_index is called.""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=True, + primary_key_correct=False, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + warn_cb = Mock() + + reconcile_index(warn_cb=warn_cb) + + mock_reset.assert_called_once() + warn_cb.assert_any_call("Primary key mismatch on empty index. Recreating...") + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + def test_index_populated_configured(self, mock_drift, mock_meilisearch): + """When index is populated and configured, no action taken.""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + status_cb = Mock() + + reconcile_index(status_cb=status_cb) + + status_cb.assert_any_call("Index is populated and correctly configured. No action needed.") + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + @patch("openedx.core.djangoapps.content.search.api._apply_index_settings") + def test_index_populated_drifted_settings(self, mock_apply, mock_drift, mock_meilisearch): + """When index is populated and drifted, settings are applied with warnings.""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=True, + distinct_attribute_match=True, + filterable_attributes_match=False, + searchable_attributes_match=False, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + status_cb = Mock() + warn_cb = Mock() + + reconcile_index(status_cb=status_cb, warn_cb=warn_cb) + + mock_apply.assert_called_once_with(api.STUDIO_INDEX_NAME, wait=True, status_cb=status_cb) + # Check that drifted fields are logged + warn_cb.assert_any_call(" - filterableAttributes: DRIFTED") + warn_cb.assert_any_call(" - searchableAttributes: DRIFTED") + # Check that non-drifted fields are NOT logged as drifted + drifted_calls = [c for c in warn_cb.call_args_list if "DRIFTED" in str(c)] + assert len(drifted_calls) == 2 + + @patch("openedx.core.djangoapps.content.search.api._detect_index_drift") + @patch("openedx.core.djangoapps.content.search.api.reset_index") + def test_index_populated_wrong_pk(self, mock_reset, mock_drift, mock_meilisearch): + """When index is populated with wrong PK, reset_index is called (destructive).""" + mock_drift.return_value = IndexDrift( + exists=True, + is_empty=False, + primary_key_correct=False, + distinct_attribute_match=True, + filterable_attributes_match=True, + searchable_attributes_match=True, + sortable_attributes_match=True, + ranking_rules_match=True, + ) + warn_cb = Mock() + + reconcile_index(warn_cb=warn_cb) + + mock_reset.assert_called_once() + # Should warn about data loss + warn_cb.assert_any_call("Index recreated empty. Run './manage.py cms reindex_studio' to repopulate.") + + @override_settings(MEILISEARCH_ENABLED=False) + def test_meilisearch_disabled(self, mock_meilisearch): + """When Meilisearch is disabled, reconcile_index raises RuntimeError (from client).""" + api.clear_meilisearch_client() + with pytest.raises(RuntimeError): + reconcile_index() + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestHandlePostMigrate(TestCase): + """Tests for the handle_post_migrate signal handler.""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_calls_reconcile_for_search_app(self, mock_reconcile, mock_meilisearch): + """Handler calls reconcile_index when sender is the search app.""" + sender = Mock() + sender.label = ContentSearchConfig.label + + handle_post_migrate(sender=sender) + + mock_reconcile.assert_called_once() + + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_skips_wrong_sender(self, mock_reconcile, mock_meilisearch): + """Handler does nothing when sender is a different app.""" + sender = Mock() + sender.label = "some_other_app" + + handle_post_migrate(sender=sender) + + mock_reconcile.assert_not_called() + + @override_settings(MEILISEARCH_ENABLED=False) + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_skips_when_disabled(self, mock_reconcile, mock_meilisearch): + """Handler does nothing when Meilisearch is disabled.""" + sender = Mock() + sender.label = ContentSearchConfig.label + + handle_post_migrate(sender=sender) + + mock_reconcile.assert_not_called() + + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_catches_connection_error(self, mock_reconcile, mock_meilisearch): + """Handler catches ConnectionError and logs warning.""" + sender = Mock() + sender.label = ContentSearchConfig.label + mock_reconcile.side_effect = ConnectionError("Cannot connect") + + # Should not raise + handle_post_migrate(sender=sender) + + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_catches_meilisearch_error(self, mock_reconcile, mock_meilisearch): + """Handler catches MeilisearchError and logs warning.""" + sender = Mock() + sender.label = ContentSearchConfig.label + mock_reconcile.side_effect = MeilisearchError("Something went wrong") + + # Should not raise + handle_post_migrate(sender=sender) + + @patch("openedx.core.djangoapps.content.search.handlers.reconcile_index") + def test_catches_generic_exception(self, mock_reconcile, mock_meilisearch): + """Handler catches unexpected exceptions and logs warning.""" + sender = Mock() + sender.label = ContentSearchConfig.label + mock_reconcile.side_effect = RuntimeError("Unexpected") + + # Should not raise + handle_post_migrate(sender=sender) + + def test_signal_connected(self, mock_meilisearch): + """Verify post_migrate signal is connected to handle_post_migrate.""" + from django.db.models.signals import post_migrate + + from ..handlers import handle_post_migrate as handler_fn + + # Check that the handler is in the receivers + receiver_funcs = [r[1]() for r in post_migrate.receivers if r[1]() is not None] + assert handler_fn in receiver_funcs, "handle_post_migrate should be connected to post_migrate signal" + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestInitIndexBackwardCompat(TestCase): + """Tests that init_index() still works as a compatibility wrapper.""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + @patch("openedx.core.djangoapps.content.search.api.reconcile_index") + def test_init_index_delegates_to_reconcile(self, mock_reconcile, mock_meilisearch): + """init_index() should delegate to reconcile_index().""" + status_cb = Mock() + warn_cb = Mock() + + api.init_index(status_cb=status_cb, warn_cb=warn_cb) + + mock_reconcile.assert_called_once_with(status_cb=status_cb, warn_cb=warn_cb) diff --git a/openedx/core/djangoapps/content/search/tests/test_reindex_cmd.py b/openedx/core/djangoapps/content/search/tests/test_reindex_cmd.py new file mode 100644 index 000000000000..78e7e47bf9e2 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_reindex_cmd.py @@ -0,0 +1,106 @@ +""" +Tests for the reindex_studio management command and the rebuild_index_incremental Celery task. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, Mock, patch + +import pytest +from django.core.management import CommandError, call_command +from django.test import TestCase, override_settings + +from openedx.core.djangolib.testing.utils import skip_unless_cms + +try: + from .. import api + from ..tasks import rebuild_index_incremental +except RuntimeError: + pass + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +class TestReindexStudioCommand(TestCase): + """Tests for the reindex_studio management command.""" + + @patch("openedx.core.djangoapps.content.search.tasks.rebuild_index_incremental.delay") + def test_enqueues_task(self, mock_delay): + """Command enqueues the incremental rebuild task.""" + mock_delay.return_value = Mock(id="fake-task-id") + + call_command("reindex_studio") + + mock_delay.assert_called_once_with() + + @override_settings(MEILISEARCH_ENABLED=False) + def test_disabled(self): + """Command raises error when Meilisearch is disabled.""" + with pytest.raises(CommandError, match="not enabled"): + call_command("reindex_studio") + + def test_reset_flag_removed(self): + """Passing --reset raises a clear error.""" + with pytest.raises(CommandError, match="--reset flag has been removed"): + call_command("reindex_studio", "--reset") + + def test_init_flag_removed(self): + """Passing --init raises a clear error.""" + with pytest.raises(CommandError, match="--init flag has been removed"): + call_command("reindex_studio", "--init") + + @patch("openedx.core.djangoapps.content.search.tasks.rebuild_index_incremental.delay") + @patch("openedx.core.djangoapps.content.search.management.commands.reindex_studio.log") + def test_incremental_flag_accepted_with_warning(self, mock_log, mock_delay): + """Passing --incremental logs a warning but still enqueues the task.""" + mock_delay.return_value = Mock(id="fake-task-id") + + call_command("reindex_studio", "--incremental") + + mock_log.warning.assert_called_once() + mock_delay.assert_called_once_with() + + +@skip_unless_cms +@override_settings(MEILISEARCH_ENABLED=True) +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestRebuildIndexIncrementalTask(TestCase): + """Tests for the rebuild_index_incremental Celery task.""" + + def setUp(self): + super().setUp() + api.clear_meilisearch_client() + + @patch("openedx.core.djangoapps.content.search.api.rebuild_index") + def test_calls_rebuild_incremental(self, mock_rebuild, mock_meilisearch): + """Task calls api.rebuild_index with incremental=True.""" + rebuild_index_incremental() + + mock_rebuild.assert_called_once() + _, kwargs = mock_rebuild.call_args + assert kwargs["incremental"] is True + + @patch("openedx.core.djangoapps.content.search.api.rebuild_index") + def test_rebuild_already_in_progress(self, mock_rebuild, mock_meilisearch): + """Task exits gracefully if rebuild lock is already held.""" + mock_rebuild.side_effect = RuntimeError("Rebuild already in progress") + + # Should not raise + rebuild_index_incremental() + + @patch("openedx.core.djangoapps.content.search.api.rebuild_index") + def test_other_runtime_error_raised(self, mock_rebuild, mock_meilisearch): + """Task re-raises RuntimeError if it's not about lock contention.""" + mock_rebuild.side_effect = RuntimeError("Something else went wrong") + + with pytest.raises(RuntimeError, match="Something else went wrong"): + rebuild_index_incremental() + + @patch("openedx.core.djangoapps.content.search.api.rebuild_index") + def test_idempotent(self, mock_rebuild, mock_meilisearch): + """Task can be called multiple times safely.""" + rebuild_index_incremental() + rebuild_index_incremental() + + assert mock_rebuild.call_count == 2