Skip to content

Commit 914fa38

Browse files
fix: actually delete volumes from cloud providers
1 parent c234c25 commit 914fa38

File tree

1 file changed

+58
-7
lines changed

1 file changed

+58
-7
lines changed

src/dstack/_internal/server/background/tasks/process_idle_volumes.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,19 @@
55
from sqlalchemy.ext.asyncio import AsyncSession
66
from sqlalchemy.orm import selectinload
77

8+
from dstack._internal.core.backends.base.compute import ComputeWithVolumeSupport
9+
from dstack._internal.core.errors import BackendNotAvailable
810
from dstack._internal.core.models.profiles import parse_duration
911
from dstack._internal.core.models.volumes import VolumeStatus
1012
from dstack._internal.server.db import get_db, get_session_ctx
1113
from dstack._internal.server.models import VolumeModel
14+
from dstack._internal.server.services import backends as backends_services
1215
from dstack._internal.server.services.locking import get_locker
13-
from dstack._internal.server.services.volumes import get_volume_configuration
16+
from dstack._internal.server.services.volumes import (
17+
get_volume_configuration,
18+
volume_model_to_volume,
19+
)
20+
from dstack._internal.utils import common
1421
from dstack._internal.utils.common import get_current_datetime
1522
from dstack._internal.utils.logging import get_logger
1623

@@ -43,6 +50,7 @@ async def process_idle_volumes():
4350
select(VolumeModel)
4451
.where(VolumeModel.id.in_(volume_ids))
4552
.options(selectinload(VolumeModel.project))
53+
.options(selectinload(VolumeModel.user))
4654
.options(selectinload(VolumeModel.attachments))
4755
.execution_options(populate_existing=True)
4856
)
@@ -99,20 +107,63 @@ def _get_idle_time(volume: VolumeModel) -> datetime.timedelta:
99107

100108

101109
async def _delete_idle_volumes(session: AsyncSession, volumes: List[VolumeModel]):
102-
"""Mark idle volumes as deleted."""
103-
for volume in volumes:
110+
"""Delete idle volumes from cloud providers and mark as deleted in database."""
111+
for volume_model in volumes:
112+
try:
113+
# Try to delete from cloud provider first
114+
await _delete_volume_from_cloud(session, volume_model)
115+
except Exception:
116+
logger.exception("Error when deleting volume %s from cloud", volume_model.name)
117+
118+
# Always mark as deleted in database, even if cloud deletion failed
104119
try:
105-
# Mark volume as deleted
106120
await session.execute(
107121
update(VolumeModel)
108-
.where(VolumeModel.id == volume.id)
122+
.where(VolumeModel.id == volume_model.id)
109123
.values(
110124
deleted=True,
111125
deleted_at=get_current_datetime(),
112126
)
113127
)
114-
logger.info("Marked idle volume %s for deletion", volume.name)
128+
logger.info("Deleted idle volume %s", volume_model.name)
115129
except Exception:
116-
logger.exception("Failed to mark volume %s for deletion", volume.name)
130+
logger.exception("Failed to mark volume %s as deleted in database", volume_model.name)
117131

118132
await session.commit()
133+
134+
135+
async def _delete_volume_from_cloud(session: AsyncSession, volume_model: VolumeModel):
136+
"""Delete volume from cloud provider. Based on volumes.py:_delete_volume"""
137+
volume = volume_model_to_volume(volume_model)
138+
139+
if volume.external:
140+
# External volumes are not managed by dstack
141+
return
142+
143+
if volume.provisioning_data is None:
144+
# The volume wasn't provisioned so there is nothing to delete
145+
return
146+
147+
if volume.provisioning_data.backend is None:
148+
logger.error(
149+
f"Failed to delete volume {volume_model.name}. volume.provisioning_data.backend is None."
150+
)
151+
return
152+
153+
try:
154+
backend = await backends_services.get_project_backend_by_type_or_error(
155+
project=volume_model.project,
156+
backend_type=volume.provisioning_data.backend,
157+
)
158+
except BackendNotAvailable:
159+
logger.error(
160+
f"Failed to delete volume {volume_model.name}. Backend {volume.configuration.backend} not available."
161+
)
162+
return
163+
164+
compute = backend.compute()
165+
assert isinstance(compute, ComputeWithVolumeSupport)
166+
await common.run_async(
167+
compute.delete_volume,
168+
volume=volume,
169+
)

0 commit comments

Comments
 (0)