Skip to content

Commit e5f265b

Browse files
committed
Handle locked volumes when attaching
1 parent 9155bfd commit e5f265b

2 files changed

Lines changed: 16 additions & 5 deletions

File tree

src/dstack/_internal/server/background/scheduled_tasks/submitted_jobs.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,18 +1042,23 @@ async def _attach_volumes(
10421042
)
10431043
job_runtime_data.volume_names.append(volume.name)
10441044
break # attach next mount point
1045-
except (ServerClientError, BackendError) as e:
1046-
logger.warning("%s: failed to attached volume: %s", fmt(job_model), repr(e))
1045+
except ServerClientError as e:
1046+
logger.info("%s: failed to attach volume: %s", fmt(job_model), repr(e))
10471047
job_model.termination_reason = JobTerminationReason.VOLUME_ERROR
1048-
job_model.termination_reason_message = "Failed to attach volume"
1048+
job_model.termination_reason_message = f"Failed to attach volume: {e.msg}"
1049+
switch_job_status(session, job_model, JobStatus.TERMINATING)
1050+
except BackendError as e:
1051+
logger.warning("%s: failed to attach volume: %s", fmt(job_model), repr(e))
1052+
job_model.termination_reason = JobTerminationReason.VOLUME_ERROR
1053+
job_model.termination_reason_message = f"Failed to attach volume: {str(e)}"
10491054
switch_job_status(session, job_model, JobStatus.TERMINATING)
10501055
except Exception:
10511056
logger.exception(
10521057
"%s: got exception when attaching volume",
10531058
fmt(job_model),
10541059
)
10551060
job_model.termination_reason = JobTerminationReason.VOLUME_ERROR
1056-
job_model.termination_reason_message = "Failed to attach volume"
1061+
job_model.termination_reason_message = "Failed to attach volume: unexpected error"
10571062
switch_job_status(session, job_model, JobStatus.TERMINATING)
10581063
finally:
10591064
job_model.job_runtime_data = job_runtime_data.json()
@@ -1075,6 +1080,8 @@ async def _attach_volume(
10751080
raise ServerClientError("Cannot attach a deleted volume")
10761081
if volume_model.to_be_deleted:
10771082
raise ServerClientError("Cannot attach a volume marked for deletion")
1083+
if volume_model.lock_expires_at is not None:
1084+
raise ServerClientError("Cannot attach a volume locked for processing")
10781085
attachment_data = await common_utils.run_async(
10791086
compute.attach_volume,
10801087
volume=volume,

src/dstack/_internal/server/services/jobs/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,10 @@ async def get_job_configured_volume_models(
720720
)
721721
if volume_model is None:
722722
raise ResourceNotExistsError(f"Volume {mount_point.name} not found")
723+
if volume_model.to_be_deleted:
724+
raise ServerClientError(
725+
f"Volume {mount_point.name} is marked for deletion and cannot be attached"
726+
)
723727
mount_point_volume_models.append(volume_model)
724728
volume_models.append(mount_point_volume_models)
725729
return volume_models
@@ -729,7 +733,7 @@ def check_can_attach_job_volumes(volumes: List[List[Volume]]):
729733
"""
730734
Performs basic checks if volumes can be attached.
731735
This is useful to show error ASAP (when user submits the run).
732-
If the attachment is to fail anyway, the error will be handled when proccessing submitted jobs.
736+
If the attachment is to fail anyway, the error will be handled when processing submitted jobs.
733737
"""
734738
if len(volumes) == 0:
735739
return

0 commit comments

Comments
 (0)