Skip to content

Commit 1252e04

Browse files
committed
fixup! ✨(backend) add time-bound filtering options on index command
fixup! fixup! ✨(backend) add time-bound filtering options on index command Signed-off-by: charles <charles.englebert@protonmail.com>
1 parent 5586ae7 commit 1252e04

9 files changed

Lines changed: 304 additions & 248 deletions

File tree

src/backend/core/factories.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from django.contrib.auth.hashers import make_password
77

88
import factory.fuzzy
9+
from factory import post_generation
910
from faker import Faker
1011

1112
from core import models
@@ -159,6 +160,20 @@ def masked_by(self, create, extracted, **kwargs):
159160
document=self, user=item, defaults={"is_masked": True}
160161
)
161162

163+
@post_generation
164+
def updated_at(self, create, extracted, **kwargs):
165+
"""
166+
the BaseModel.updated_at has auto_now=True.
167+
This prevents setting a specific updated_at value with the factory.
168+
169+
This post_generation method bypasses this behavior.
170+
"""
171+
if not create or not extracted:
172+
return
173+
174+
self.__class__.objects.filter(pk=self.pk).update(updated_at=extracted)
175+
self.refresh_from_db()
176+
162177

163178
class UserDocumentAccessFactory(factory.django.DjangoModelFactory):
164179
"""Create fake document user accesses for testing."""

src/backend/core/management/commands/index.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@
33
"""
44

55
import logging
6+
import time
67
from datetime import datetime
78

89
from django.conf import settings
910
from django.core.management.base import BaseCommand, CommandError
1011

12+
from core import models
1113
from core.services.search_indexers import get_document_indexer
1214
from core.tasks.search import batch_document_indexer_task
1315

14-
logger = logging.getLogger("docs.search.bootstrap_search")
16+
logger = logging.getLogger(__name__)
1517

1618

1719
class Command(BaseCommand):
@@ -45,6 +47,13 @@ def add_arguments(self, parser):
4547
default=None,
4648
help="DateTime in ISO format. Only documents updated before this date will be indexed",
4749
)
50+
parser.add_argument(
51+
"--async",
52+
action="store_true",
53+
dest="async_mode",
54+
default=False,
55+
help="Whether to execute indexing asynchronously in a Celery task (default: False)",
56+
)
4857

4958
def handle(self, *args, **options):
5059
"""Launch and log search index generation."""
@@ -53,15 +62,37 @@ def handle(self, *args, **options):
5362
if not indexer:
5463
raise CommandError("The indexer is not enabled or properly configured.")
5564

56-
batch_document_indexer_task.apply_async(
57-
kwargs={
58-
"lower_time_bound": options["lower_time_bound"],
59-
"upper_time_bound": options["upper_time_bound"],
60-
"batch_size": options["batch_size"],
61-
"crash_safe_mode": True,
62-
},
63-
)
65+
if options["async_mode"]:
66+
batch_document_indexer_task.apply_async(
67+
kwargs={
68+
"lower_time_bound": options["lower_time_bound"],
69+
"upper_time_bound": options["upper_time_bound"],
70+
"batch_size": options["batch_size"],
71+
"crash_safe_mode": True,
72+
},
73+
)
74+
logger.info(
75+
"Document indexing task sent to worker",
76+
)
77+
else:
78+
logger.info("Starting to regenerate Find index...")
79+
start = time.perf_counter()
6480

65-
logger.info(
66-
"Document indexing task sent to worker",
67-
)
81+
try:
82+
count = indexer.index(
83+
queryset=models.Document.objects.filter_updated_at(
84+
lower_time_bound=options["lower_time_bound"],
85+
upper_time_bound=options["upper_time_bound"],
86+
),
87+
batch_size=options["batch_size"],
88+
crash_safe_mode=True,
89+
)
90+
except Exception as err:
91+
raise CommandError("Unable to regenerate index") from err
92+
93+
duration = time.perf_counter() - start
94+
logger.info(
95+
"Search index regenerated from %d document(s) in %.2f seconds.",
96+
count,
97+
duration,
98+
)

src/backend/core/models.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,31 @@ def annotate_user_roles(self, user):
859859
user_roles=models.Value([], output_field=output_field),
860860
)
861861

862+
def filter_updated_at(self, lower_time_bound=None, upper_time_bound=None):
863+
"""
864+
Filter documents by update_at.
865+
866+
Args:
867+
lower_time_bound (datetime, optional):
868+
Keep documents updated after this timestamp.
869+
upper_time_bound (datetime, optional):
870+
Keep documents updated before this timestamp.
871+
872+
Returns:
873+
QuerySet: Filtered queryset ready for indexation.
874+
"""
875+
conditions = models.Q()
876+
if lower_time_bound and upper_time_bound:
877+
conditions = models.Q(
878+
updated_at__gte=lower_time_bound, updated_at__lte=upper_time_bound
879+
)
880+
elif lower_time_bound:
881+
conditions = models.Q(updated_at__gte=lower_time_bound)
882+
elif upper_time_bound:
883+
conditions = models.Q(updated_at__lte=upper_time_bound)
884+
885+
return self.filter(conditions)
886+
862887

863888
class DocumentManager(MP_NodeManager.from_queryset(DocumentQuerySet)):
864889
"""

src/backend/core/tasks/search.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from core.services.search_indexers import (
1212
get_document_indexer,
1313
)
14-
from core.utils import build_indexable_documents_queryset
1514

1615
from impress.celery_app import app
1716

@@ -81,7 +80,7 @@ def batch_document_indexer_task(lower_time_bound=None, upper_time_bound=None, **
8180
return
8281

8382
count = indexer.index(
84-
queryset=build_indexable_documents_queryset(
83+
queryset=models.Document.objects.filter_updated_at(
8584
lower_time_bound=lower_time_bound, upper_time_bound=upper_time_bound
8685
),
8786
**kwargs,

0 commit comments

Comments
 (0)