Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
300 changes: 218 additions & 82 deletions openedx/core/djangoapps/content/search/api.py

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions openedx/core/djangoapps/content/search/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ class ContentSearchConfig(AppConfig):

default_auto_field = "django.db.models.BigAutoField"
name = "openedx.core.djangoapps.content.search"
label = "search"

def ready(self):
# Connect signal handlers
# Connect post_migrate for Meilisearch index reconciliation.
# No sender= argument here; the handler filters by sender.label internally.
from django.db.models.signals import post_migrate # pylint: disable=import-outside-toplevel

from . import handlers # pylint: disable=unused-import # noqa: F401
post_migrate.connect(handlers.handle_post_migrate)
33 changes: 33 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
from xmodule.modulestore.django import SignalHandler

from .api import (
is_meilisearch_enabled,
only_if_meilisearch_enabled,
reconcile_index,
upsert_content_object_tags_index_doc,
upsert_item_collections_index_docs,
upsert_item_containers_index_docs,
Expand All @@ -68,6 +70,37 @@
log = logging.getLogger(__name__)


def handle_post_migrate(sender, **kwargs):
"""
Reconcile Meilisearch index state after Django migrations run.

Filters on sender.label to only execute for the search app's post_migrate signal.
Tolerant of Meilisearch unavailability — logs a warning and continues.
"""
from .apps import ContentSearchConfig # pylint: disable=import-outside-toplevel

if sender.label != ContentSearchConfig.label:
return

if not is_meilisearch_enabled():
return

try:
reconcile_index(status_cb=log.info, warn_cb=log.warning)
except ConnectionError as exc:
log.warning(
"Meilisearch reconciliation skipped during post_migrate: %s. "
"Will retry on next migrate run.",
exc,
)
except Exception as exc: # pylint: disable=broad-except
log.warning(
"Meilisearch reconciliation failed during post_migrate: %s. "
"Will retry on next migrate run.",
exc,
)


# Using post_delete here because there is no COURSE_DELETED event defined.
@receiver(post_delete, sender=CourseOverview)
def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument
Expand Down
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/content/search/index_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Configuration for the search index."""
from .documents import Fields

# The Meilisearch primary key for all documents in the index.
INDEX_PRIMARY_KEY = Fields.id

INDEX_DISTINCT_ATTRIBUTE = "usage_key"

# Mark which attributes can be used for filtering/faceted search:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,43 +1,100 @@
"""
Command to build or re-build the search index for courses (in Studio, i.e. Draft
mode), in Meilisearch.
Command to queue incremental population of the Studio Meilisearch search index.

Index creation, configuration, and schema reconciliation are handled
automatically via the post_migrate signal. This command is solely
responsible for enqueuing the population task in Celery.

See also cms/djangoapps/contentstore/management/commands/reindex_course.py which
indexes LMS (published) courses in ElasticSearch.
"""

import logging

from django.conf import settings
from django.core.management import BaseCommand, CommandError

from ... import api
from ...tasks import rebuild_index_incremental

log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Build or re-build the Meilisearch search index for courses and libraries in Studio.
Add all course and library content to the Studio search index.

This enqueues a Celery task that incrementally indexes all courses and
libraries. Progress is tracked via IncrementalIndexCompleted, so the task
can safely resume if interrupted.
Comment thread
bradenmacdonald marked this conversation as resolved.

Index creation and configuration are handled by post_migrate reconciliation
(runs automatically on ./manage.py cms migrate).

This is separate from LMS search features like courseware search or forum search.
If it's ever necessary to reset the incremental indexing state (force
the full re-index process to start from the beginning), use:

./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()'

This will delete all the IncrementalIndexCompleted records and will help in restarting the index population.
"""

# TODO: improve this - see https://github.com/openedx/edx-platform/issues/36868
help = "Add all course and library content to the Studio search index."

def add_arguments(self, parser):
parser.add_argument("--experimental", action="store_true") # kept for compatibility but ignored.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for taking on this work @farhaanbukhsh !

Would you mind putting the --experimental flag back as a no-op, like you did for the other flags? It will make life a little easier for developers and operators.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR: #38433

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also #38434

parser.add_argument("--reset", action="store_true")
parser.add_argument("--init", action="store_true")
parser.add_argument("--incremental", action="store_true")
parser.set_defaults(experimental=False, reset=False, init=False, incremental=False)
# Removed flags — provide clear error messages for operators with old automation.
parser.add_argument(
"--reset",
action="store_true",
default=False,
help="(Removed) Index reset is now handled by post_migrate reconciliation.",
)
parser.add_argument(
"--init",
action="store_true",
default=False,
help="(Removed) Index initialization is now handled by post_migrate reconciliation.",
)
parser.add_argument(
"--incremental",
action="store_true",
default=False,
help="(Removed) Incremental is now the default and only population mode.",
)

def handle(self, *args, **options):
"""
Build a new search index for Studio, containing content from courses and libraries
"""
if not api.is_meilisearch_enabled():
raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.")

if options["reset"]:
api.reset_index(self.stdout.write)
elif options["init"]:
api.init_index(self.stdout.write, self.stderr.write)
elif options["incremental"]:
api.rebuild_index(self.stdout.write, incremental=True)
raise CommandError(
"The --reset flag has been removed. "
"Index reset is now handled automatically by post_migrate reconciliation. "
"Run: ./manage.py cms migrate"
)

if options["init"]:
raise CommandError(
"The --init flag has been removed. "
"Index initialization is now handled automatically by post_migrate reconciliation. "
"Run: ./manage.py cms migrate"
)

if options["incremental"]:
log.warning(
"The --incremental flag has been removed. "
"Incremental population is now the default behavior of this command."
)

result = rebuild_index_incremental.delay()

if settings.CELERY_ALWAYS_EAGER:
self.stdout.write("Indexing complete!")
else:
api.rebuild_index(self.stdout.write)
self.stdout.write(
f"Studio search index population has been queued (task_id={result.id}). "
"Population will run incrementally in a Celery worker. "
"Monitor progress in Celery worker logs. "
"In order to reset the incremental indexing state, please run: "
"./manage.py cms shell -c 'IncrementalIndexCompleted.objects.all().delete()'"
)
33 changes: 33 additions & 0 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,36 @@ def delete_course_index_docs(course_key_str: str) -> None:

# Delete children index data for course blocks.
api.delete_docs_with_context_key(course_key)


@shared_task(
base=LoggedTask,
autoretry_for=(MeilisearchError, ConnectionError),
max_retries=3,
retry_backoff=True,
)
@set_code_owner_attribute
def rebuild_index_incremental() -> None:
"""
Celery task to incrementally populate the Studio Meilisearch index.

Uses IncrementalIndexCompleted to track progress and resume from where
it left off if interrupted. Safe to call multiple times — already-indexed
contexts are skipped.

If a rebuild is already in progress (lock held), the task exits gracefully.
"""
log.info("Starting incremental Studio search index population...")

try:
api.rebuild_index(status_cb=log.info, incremental=True)
except RuntimeError as exc:
# rebuild_index -> _using_temp_index or lock contention
if "already in progress" in str(exc).lower():
log.warning(
"Studio index population skipped: a rebuild is already in progress. Will retry later if re-enqueued."
)
return
raise

log.info("Incremental Studio search index population complete.")
19 changes: 16 additions & 3 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,19 +449,32 @@ def test_reset_meilisearch_index(self, mock_meilisearch) -> None:

@override_settings(MEILISEARCH_ENABLED=True)
def test_init_meilisearch_index(self, mock_meilisearch) -> None:
# Test index already exists
# Test index already exists, is populated, and correctly configured
mock_index = Mock()
mock_index.primary_key = "id"
mock_index.get_stats.return_value = Mock(number_of_documents=100)
mock_index.get_settings.return_value = {
"distinctAttribute": "usage_key",
"filterableAttributes": list(api.INDEX_FILTERABLE_ATTRIBUTES),
"searchableAttributes": list(api.INDEX_SEARCHABLE_ATTRIBUTES),
"sortableAttributes": list(api.INDEX_SORTABLE_ATTRIBUTES),
"rankingRules": list(api.INDEX_RANKING_RULES),
}
mock_meilisearch.return_value.get_index.return_value = mock_index

api.init_index()
mock_meilisearch.return_value.swap_indexes.assert_not_called()
mock_meilisearch.return_value.create_index.assert_not_called()
mock_meilisearch.return_value.delete_index.assert_not_called()

# Test index already exists and has no documents
mock_meilisearch.return_value.get_stats.return_value = 0
# Test index already exists and is empty but correctly configured
mock_index.get_stats.return_value = Mock(number_of_documents=0)
api.init_index()
mock_meilisearch.return_value.swap_indexes.assert_not_called()
mock_meilisearch.return_value.create_index.assert_not_called()
mock_meilisearch.return_value.delete_index.assert_not_called()

# Test index does not exist — should create it
mock_meilisearch.return_value.get_index.side_effect = [
MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),
MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),
Expand Down
Loading
Loading