Skip to content

Commit 2fd5857

Browse files
fix: make studio reindex more robust, provide better logging (#38498)
1 parent 5b043a3 commit 2fd5857

1 file changed

Lines changed: 24 additions & 9 deletions

File tree

  • openedx/core/djangoapps/content/search

openedx/core/djangoapps/content/search/api.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,11 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
541541
reconcile_index(status_cb=status_cb, warn_cb=warn_cb)
542542

543543

544-
def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
544+
def index_course(
545+
course_key: CourseKey,
546+
index_name: str | None = None,
547+
status_cb: Callable[[str], None] | None = None,
548+
) -> list[dict]:
545549
"""
546550
Rebuilds the index for a given course.
547551
"""
@@ -550,9 +554,16 @@ def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
550554
docs = []
551555
if index_name is None:
552556
index_name = STUDIO_INDEX_NAME
557+
if status_cb is None:
558+
status_cb = log.info
559+
553560
# Pre-fetch the course with all of its children:
554561
course = store.get_course(course_key, depth=None)
555562

563+
if course is None:
564+
status_cb(f"Error: course {course_key} does not seem to exist! It may have been incompletely deleted.")
565+
return []
566+
556567
def add_with_children(block):
557568
"""Recursively index the given XBlock/component"""
558569
doc = searchable_doc_for_course_block(block)
@@ -585,6 +596,8 @@ def rebuild_index( # pylint: disable=too-many-statements
585596
keys_indexed = []
586597
if incremental:
587598
keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True))
599+
if keys_indexed:
600+
status_cb(f"Resuming incremental index - {len(keys_indexed)} courses/libraries already indexed.")
588601
lib_keys = [
589602
lib.library_key
590603
for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id")
@@ -698,31 +711,33 @@ def index_container_batch(batch, num_done, library_key) -> int:
698711
collections = content_api.get_collections(library.learning_package_id, enabled=True)
699712
num_collections = collections.count()
700713
num_collections_done = 0
701-
status_cb(f"{num_collections_done}/{num_collections}. Now indexing collections in library {lib_key}")
714+
if num_collections:
715+
status_cb(f"Now indexing {num_collections} collections in library {lib_key}")
702716
paginator = Paginator(collections, 100)
703717
for p in paginator.page_range:
704718
num_collections_done = index_collection_batch(
705719
paginator.page(p).object_list,
706720
num_collections_done,
707721
lib_key,
708722
)
709-
if incremental:
710-
IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
711-
status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
723+
status_cb(f"Indexed {num_collections_done}/{num_collections} collections in library {lib_key}")
712724

713725
# Similarly, batch process Containers (units, sections, etc) in pages of 100
714726
containers = content_api.get_containers(library.learning_package_id)
715727
num_containers = containers.count()
716728
num_containers_done = 0
717-
status_cb(f"{num_containers_done}/{num_containers}. Now indexing containers in library {lib_key}")
729+
if num_containers:
730+
status_cb(f"Now indexing {num_containers} containers in library {lib_key}")
718731
paginator = Paginator(containers, 100)
719732
for p in paginator.page_range:
720733
num_containers_done = index_container_batch(
721734
paginator.page(p).object_list,
722735
num_containers_done,
723736
lib_key,
724737
)
725-
status_cb(f"{num_containers_done}/{num_containers} containers indexed for library {lib_key}")
738+
status_cb(f"Indexed {num_containers_done}/{num_containers} containers in library {lib_key}")
739+
740+
# Mark this library as indexed:
726741
if incremental:
727742
IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
728743

@@ -732,7 +747,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
732747
status_cb("Indexing courses...")
733748
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
734749

735-
paginator = Paginator(CourseOverview.objects.only("id", "display_name"), 1000)
750+
paginator = Paginator(CourseOverview.objects.only("id", "display_name").order_by("-created", "id"), 1000)
736751
for p in paginator.page_range:
737752
for course in paginator.page(p).object_list:
738753
status_cb(
@@ -741,7 +756,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
741756
if course.id in keys_indexed:
742757
num_contexts_done += 1
743758
continue
744-
course_docs = index_course(course.id, index_name)
759+
course_docs = index_course(course.id, index_name, status_cb)
745760
if incremental:
746761
IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
747762
num_contexts_done += 1

0 commit comments

Comments
 (0)