Skip to content

Commit e7e2016

Browse files
authored
Retry embeddings tasks on Qdrant gRPC DEADLINE_EXCEEDED (#3520)
1 parent d3716bf commit e7e2016

2 files changed

Lines changed: 80 additions & 8 deletions

File tree

vector_search/tasks.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33

44
import celery
5+
import grpc
56
import sentry_sdk
67
from celery.exceptions import Ignore
78
from django.conf import settings
@@ -119,10 +120,10 @@ def generate_embeddings(ids, resource_type, overwrite):
119120
raise
120121
except SystemExit as err:
121122
raise RetryError(SystemExit.__name__) from err
122-
except: # noqa: E722
123-
error = "generate_embeddings threw an error"
124-
log.exception(error)
125-
return error
123+
except grpc.RpcError as err:
124+
if err.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
125+
raise RetryError(str(err)) from err
126+
raise
126127

127128

128129
@app.task(
@@ -148,10 +149,10 @@ def remove_embeddings(ids, resource_type):
148149
raise
149150
except SystemExit as err:
150151
raise RetryError(SystemExit.__name__) from err
151-
except: # noqa: E722
152-
error = "generate_embeddings threw an error"
153-
log.exception(error)
154-
return error
152+
except grpc.RpcError as err:
153+
if err.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
154+
raise RetryError(str(err)) from err
155+
raise
155156

156157

157158
@app.task(bind=True)

vector_search/tasks_test.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import datetime
22
import random
33

4+
import grpc
45
import pytest
56
from django.conf import settings
67

@@ -24,13 +25,16 @@
2425
LEARNING_RESOURCE_TYPES,
2526
PROGRAM_TYPE,
2627
)
28+
from learning_resources_search.exceptions import RetryError
2729
from main.utils import now_in_utc
2830
from vector_search.tasks import (
2931
embed_learning_resources_by_id,
3032
embed_new_content_files,
3133
embed_new_learning_resources,
3234
embed_run_content_files,
3335
embeddings_healthcheck,
36+
generate_embeddings,
37+
remove_embeddings,
3438
remove_run_content_files,
3539
remove_unpublished_run_content_files,
3640
start_embed_resources,
@@ -40,6 +44,13 @@
4044
pytestmark = pytest.mark.django_db
4145

4246

47+
def _rpc_error(code):
48+
"""Build a grpc.RpcError carrying a status code, like qdrant's gRPC failures."""
49+
err = grpc.RpcError()
50+
err.code = lambda: code
51+
return err
52+
53+
4354
@pytest.mark.parametrize("index", list(LEARNING_RESOURCE_TYPES))
4455
def test_start_embed_resources(mocker, mocked_celery, index):
4556
"""
@@ -780,3 +791,63 @@ def test_embeddings_healthcheck_missing_summaries(mocker):
780791
mock_sentry.mock_calls[0].args[0]
781792
== "Warning: 1 missing content file summaries detected"
782793
)
794+
795+
796+
def test_generate_embeddings_raises_retryerror_on_grpc_deadline(mocker):
797+
"""A DEADLINE_EXCEEDED gRPC error becomes a RetryError (autoretry_for picks it up)."""
798+
mocker.patch(
799+
"vector_search.tasks.embed_learning_resources",
800+
side_effect=_rpc_error(grpc.StatusCode.DEADLINE_EXCEEDED),
801+
)
802+
with pytest.raises(RetryError):
803+
generate_embeddings([1], COURSE_TYPE, overwrite=False)
804+
805+
806+
def test_generate_embeddings_reraises_other_grpc_errors(mocker):
807+
"""Non-transient gRPC errors propagate (task fails) rather than retrying."""
808+
mocker.patch(
809+
"vector_search.tasks.embed_learning_resources",
810+
side_effect=_rpc_error(grpc.StatusCode.INVALID_ARGUMENT),
811+
)
812+
with pytest.raises(grpc.RpcError):
813+
generate_embeddings([1], COURSE_TYPE, overwrite=False)
814+
815+
816+
def test_generate_embeddings_does_not_swallow_errors(mocker):
817+
"""Unhandled errors propagate so the task fails instead of reporting success."""
818+
mocker.patch(
819+
"vector_search.tasks.embed_learning_resources",
820+
side_effect=ValueError("boom"),
821+
)
822+
with pytest.raises(ValueError, match="boom"):
823+
generate_embeddings([1], COURSE_TYPE, overwrite=False)
824+
825+
826+
def test_remove_embeddings_raises_retryerror_on_grpc_deadline(mocker):
827+
"""remove_embeddings retries on DEADLINE_EXCEEDED rather than swallowing it."""
828+
mocker.patch(
829+
"vector_search.tasks.remove_qdrant_records",
830+
side_effect=_rpc_error(grpc.StatusCode.DEADLINE_EXCEEDED),
831+
)
832+
with pytest.raises(RetryError):
833+
remove_embeddings([1], COURSE_TYPE)
834+
835+
836+
def test_remove_embeddings_reraises_other_grpc_errors(mocker):
837+
"""Non-transient gRPC errors propagate (task fails) rather than retrying."""
838+
mocker.patch(
839+
"vector_search.tasks.remove_qdrant_records",
840+
side_effect=_rpc_error(grpc.StatusCode.INVALID_ARGUMENT),
841+
)
842+
with pytest.raises(grpc.RpcError):
843+
remove_embeddings([1], COURSE_TYPE)
844+
845+
846+
def test_remove_embeddings_does_not_swallow_errors(mocker):
847+
"""Unhandled errors propagate so the task fails instead of reporting success."""
848+
mocker.patch(
849+
"vector_search.tasks.remove_qdrant_records",
850+
side_effect=ValueError("boom"),
851+
)
852+
with pytest.raises(ValueError, match="boom"):
853+
remove_embeddings([1], COURSE_TYPE)

0 commit comments

Comments
 (0)