Skip to content

Commit 7c62a02

Browse files
authored
ITEP-36123: Server returns 500 error when invoking the same job twice (#425)
1 parent 81aa958 commit 7c62a02

3 files changed

Lines changed: 27 additions & 1 deletion

File tree

interactive_ai/services/director/app/communication/controllers/training_controller.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from communication.data_validator import TrainingRestValidator
1010
from communication.exceptions import (
11+
JobDuplicateFoundException,
1112
JobInsufficientBalanceException,
1213
NotEnoughDatasetItemsException,
1314
NotEnoughSpaceHTTPException,
@@ -26,7 +27,7 @@
2627
from geti_fastapi_tools.exceptions import BadRequestException
2728
from geti_telemetry_tools import unified_tracing
2829
from geti_types import ID, DatasetStorageIdentifier
29-
from grpc_interfaces.job_submission.client import InsufficientBalanceException
30+
from grpc_interfaces.job_submission.client import DuplicateFoundException, InsufficientBalanceException
3031
from iai_core.algorithms import ModelTemplateList
3132
from iai_core.entities.annotation_scene_state import AnnotationState
3233
from iai_core.entities.project import Project
@@ -201,6 +202,9 @@ def _submit_train_job(project: Project, task_training_config: TrainingConfig, au
201202
except InsufficientBalanceException:
202203
logger.error("Insufficient balance for job execution")
203204
raise JobInsufficientBalanceException("Insufficient balance for job execution")
205+
except DuplicateFoundException:
206+
logger.error("Duplicate running job has been found")
207+
raise JobDuplicateFoundException("Duplicate running job has been found")
204208

205209
@staticmethod
206210
def _get_task_readiness_status_by_project(

interactive_ai/services/director/app/communication/exceptions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,22 @@ def __init__(self, message: str) -> None:
172172
)
173173

174174

175+
class JobDuplicateFoundException(GetiBaseException):
176+
"""
177+
This error can be raised if there is a running duplicate job is found during submission and job policy is set
178+
to REJECT
179+
180+
:param message: str containing a custom error message
181+
"""
182+
183+
def __init__(self, message: str) -> None:
184+
super().__init__(
185+
message=message,
186+
error_code="job_duplicate_found",
187+
http_status=http.HTTPStatus.PRECONDITION_FAILED,
188+
)
189+
190+
175191
class NotReadyForTrainingException(GetiBaseException):
176192
"""
177193
Exception raised when a manual training trigger is received for a task that is not

libs/grpc_interfaces/src/grpc_interfaces/job_submission/client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class InsufficientBalanceException(CommunicationError):
4444
"""Exception for when organization doesn't have enough credits for job submission."""
4545

4646

47+
class DuplicateFoundException(CommunicationError):
48+
"""Exception for when duplicate policy is set to REJECT and duplicate job is found during submission."""
49+
50+
4751
def grpc_requester(acquire_lock: bool = True): # noqa: ANN201
4852
"""
4953
Decorator for methods of GRPCJobsClient which handles GRPC requests.
@@ -357,6 +361,8 @@ def handle_grpc_error(self, error: RpcError) -> None:
357361

358362
if "Insufficient balance" in details:
359363
raise InsufficientBalanceException(details) from error
364+
if "An identical job was found in the job queue" in details:
365+
raise DuplicateFoundException(details) from error
360366

361367
if not details:
362368
details = "Request failed due to unknown issues. Please try again later."

0 commit comments

Comments
 (0)