Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,11 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
reconcile_index(status_cb=status_cb, warn_cb=warn_cb)


def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
def index_course(
course_key: CourseKey,
index_name: str | None = None,
status_cb: Callable[[str], None] | None = None,
) -> list:
"""
Rebuilds the index for a given course.
"""
Expand All @@ -550,9 +554,16 @@ def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
docs = []
if index_name is None:
index_name = STUDIO_INDEX_NAME
if status_cb is None:
status_cb = log.info

# Pre-fetch the course with all of its children:
course = store.get_course(course_key, depth=None)

if course is None:
status_cb(f"Error: course {course_key} does not seem to exist! It may have been incompletely deleted.")
return []

def add_with_children(block):
"""Recursively index the given XBlock/component"""
doc = searchable_doc_for_course_block(block)
Expand Down Expand Up @@ -585,6 +596,8 @@ def rebuild_index( # pylint: disable=too-many-statements
keys_indexed = []
if incremental:
keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True))
if keys_indexed:
status_cb(f"Resuming incremental index - {len(keys_indexed)} courses/libraries already indexed.")
lib_keys = [
lib.library_key
for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id")
Expand Down Expand Up @@ -698,31 +711,33 @@ def index_container_batch(batch, num_done, library_key) -> int:
collections = content_api.get_collections(library.learning_package_id, enabled=True)
num_collections = collections.count()
num_collections_done = 0
status_cb(f"{num_collections_done}/{num_collections}. Now indexing collections in library {lib_key}")
if num_collections:
status_cb(f"Now indexing {num_collections} collections in library {lib_key}")
paginator = Paginator(collections, 100)
for p in paginator.page_range:
num_collections_done = index_collection_batch(
paginator.page(p).object_list,
num_collections_done,
lib_key,
)
if incremental:
IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
status_cb(f"Indexed {num_collections_done}/{num_collections} collections in library {lib_key}")

# Similarly, batch process Containers (units, sections, etc) in pages of 100
containers = content_api.get_containers(library.learning_package_id)
num_containers = containers.count()
num_containers_done = 0
status_cb(f"{num_containers_done}/{num_containers}. Now indexing containers in library {lib_key}")
if num_containers:
status_cb(f"Now indexing {num_containers} containers in library {lib_key}")
paginator = Paginator(containers, 100)
for p in paginator.page_range:
num_containers_done = index_container_batch(
paginator.page(p).object_list,
num_containers_done,
lib_key,
)
status_cb(f"{num_containers_done}/{num_containers} containers indexed for library {lib_key}")
status_cb(f"Indexed {num_containers_done}/{num_containers} containers in library {lib_key}")

# Mark this library as indexed:
if incremental:
IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)

Expand All @@ -732,7 +747,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
status_cb("Indexing courses...")
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:

paginator = Paginator(CourseOverview.objects.only("id", "display_name"), 1000)
paginator = Paginator(CourseOverview.objects.only("id", "display_name").order_by("-created", "id"), 1000)
for p in paginator.page_range:
for course in paginator.page(p).object_list:
status_cb(
Expand All @@ -741,7 +756,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
if course.id in keys_indexed:
num_contexts_done += 1
continue
course_docs = index_course(course.id, index_name)
course_docs = index_course(course.id, index_name, status_cb)
if incremental:
IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
num_contexts_done += 1
Expand Down
Loading