Skip to content

Commit 300ea49

Browse files
authored
Kubernetes: add instance volumes support (#3758)
Implemented as inline `hostPath` volume in `PodSpec`. A new compute feature was added -- previously, instance volume support was implied by "create instance" feature. Part-of: #3757
1 parent 0e9bc42 commit 300ea49

File tree

22 files changed

+98
-16
lines changed

22 files changed

+98
-16
lines changed

contributing/BACKENDS.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,9 @@ The agent controls the VM and starts Docker containers for users' jobs.
181181
Since `dstack` controls the entire VM, VM-based backends can support more features,
182182
such as blocks, instance volumes, privileged containers, and reusable instances.
183183

184-
Note, all VM-based backend `Compute`s should sublass the `ComputeWithPrivilegedSupport` mixin,
185-
as the `dstack-shim` agent provides this functionality OOTB.
184+
Note, all VM-based backend `Compute`s should sublass the `ComputeWithPrivilegedSupport`
185+
and `ComputeWithInstanceVolumesSupport` mixins, as the `dstack-shim` agent
186+
provides these functionalities OOTB.
186187

187188
To support a VM-based backend, `dstack` expects the following:
188189

docs/docs/concepts/volumes.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ Since persistence isn't guaranteed (instances may be interrupted or runs may occ
262262
volumes only for caching or with directories manually mounted to network storage.
263263

264264
!!! info "Backends"
265-
Instance volumes are currently supported for all backends except `runpod`, `vastai` and `kubernetes`, and can also be used with [SSH fleets](fleets.md#ssh-fleets).
265+
Instance volumes are currently supported for all backends except `runpod` and `vastai`, and can also be used with [SSH fleets](fleets.md#ssh-fleets).
266266

267267
??? info "Optional volumes"
268268
If the volume is not critical for your workload, you can mark it as `optional`.

src/dstack/_internal/core/backends/aws/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ComputeWithAllOffersCached,
2626
ComputeWithCreateInstanceSupport,
2727
ComputeWithGatewaySupport,
28+
ComputeWithInstanceVolumesSupport,
2829
ComputeWithMultinodeSupport,
2930
ComputeWithPlacementGroupSupport,
3031
ComputeWithPrivateGatewaySupport,
@@ -108,6 +109,7 @@ class AWSCompute(
108109
ComputeWithAllOffersCached,
109110
ComputeWithCreateInstanceSupport,
110111
ComputeWithPrivilegedSupport,
112+
ComputeWithInstanceVolumesSupport,
111113
ComputeWithMultinodeSupport,
112114
ComputeWithReservationSupport,
113115
ComputeWithPlacementGroupSupport,

src/dstack/_internal/core/backends/azure/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
ComputeWithAllOffersCached,
4444
ComputeWithCreateInstanceSupport,
4545
ComputeWithGatewaySupport,
46+
ComputeWithInstanceVolumesSupport,
4647
ComputeWithMultinodeSupport,
4748
ComputeWithPrivilegedSupport,
4849
generate_unique_gateway_instance_name,
@@ -86,6 +87,7 @@ class AzureCompute(
8687
ComputeWithAllOffersCached,
8788
ComputeWithCreateInstanceSupport,
8889
ComputeWithPrivilegedSupport,
90+
ComputeWithInstanceVolumesSupport,
8991
ComputeWithMultinodeSupport,
9092
ComputeWithGatewaySupport,
9193
Compute,

src/dstack/_internal/core/backends/base/compute.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,15 @@ class ComputeWithPrivilegedSupport:
375375
pass
376376

377377

378+
class ComputeWithInstanceVolumesSupport:
379+
"""
380+
Must be subclassed to support runs with `/host/path:/container/path` volumes.
381+
All VM-based Computes (that is, Computes that use the shim) should subclass this mixin.
382+
"""
383+
384+
pass
385+
386+
378387
class ComputeWithMultinodeSupport:
379388
"""
380389
Must be subclassed to support multinode tasks and cluster fleets.
@@ -668,31 +677,39 @@ def generate_unique_backend_name(
668677
# project_name is not guaranteed to be valid in all backends,
669678
# so we add it only if it passes the validation
670679
prefix = f"dstack-{project_name}-{resource_name}"
671-
return _generate_unique_backend_name_with_prefix(
680+
return generate_unique_name(
672681
prefix=prefix,
673682
max_length=max_length,
674683
)
675684

676685

677686
def generate_unique_short_backend_name() -> str:
678687
"""
679-
Generates a unique 15-char resource name of the form "dstack-12345678".
688+
Generates a unique 15-char resource name of the form "dstack-12345xyz".
680689
Can be used for resources that have a very small length limit like AWS LBs.
681690
"""
682-
return _generate_unique_backend_name_with_prefix("dstack")
691+
return generate_unique_name(prefix="dstack")
683692

684693

685-
def _generate_unique_backend_name_with_prefix(
686-
prefix: str,
694+
def generate_unique_name(
695+
*,
696+
prefix: Optional[str] = None,
697+
suffix_length: Optional[int] = None,
687698
max_length: Optional[int] = None,
688699
) -> str:
700+
if suffix_length is None:
701+
suffix_length = _CLOUD_RESOURCE_SUFFIX_LEN
689702
if max_length is not None:
690-
prefix_len = max_length - _CLOUD_RESOURCE_SUFFIX_LEN - 1
691-
prefix = prefix[:prefix_len]
703+
assert max_length >= suffix_length
704+
if prefix is not None:
705+
prefix_len = max_length - suffix_length - 1
706+
assert prefix_len > 0
707+
prefix = prefix[:prefix_len]
692708
suffix = "".join(
693-
random.choice(string.ascii_lowercase + string.digits)
694-
for _ in range(_CLOUD_RESOURCE_SUFFIX_LEN)
709+
random.choice(string.ascii_lowercase + string.digits) for _ in range(suffix_length)
695710
)
711+
if prefix is None:
712+
return suffix
696713
return f"{prefix}-{suffix}"
697714

698715

src/dstack/_internal/core/backends/cloudrift/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Compute,
55
ComputeWithAllOffersCached,
66
ComputeWithCreateInstanceSupport,
7+
ComputeWithInstanceVolumesSupport,
78
ComputeWithPrivilegedSupport,
89
get_shim_commands,
910
)
@@ -29,6 +30,7 @@ class CloudRiftCompute(
2930
ComputeWithAllOffersCached,
3031
ComputeWithCreateInstanceSupport,
3132
ComputeWithPrivilegedSupport,
33+
ComputeWithInstanceVolumesSupport,
3234
Compute,
3335
):
3436
def __init__(self, config: CloudRiftConfig):

src/dstack/_internal/core/backends/crusoe/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from dstack._internal.core.backends.base.compute import (
99
ComputeWithAllOffersCached,
1010
ComputeWithCreateInstanceSupport,
11+
ComputeWithInstanceVolumesSupport,
1112
ComputeWithMultinodeSupport,
1213
ComputeWithPlacementGroupSupport,
1314
ComputeWithPrivilegedSupport,
@@ -124,6 +125,7 @@ class CrusoeCompute(
124125
ComputeWithAllOffersCached,
125126
ComputeWithCreateInstanceSupport,
126127
ComputeWithPrivilegedSupport,
128+
ComputeWithInstanceVolumesSupport,
127129
ComputeWithMultinodeSupport,
128130
ComputeWithPlacementGroupSupport,
129131
Compute,

src/dstack/_internal/core/backends/cudo/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from dstack._internal.core.backends.base.compute import (
77
ComputeWithCreateInstanceSupport,
88
ComputeWithFilteredOffersCached,
9+
ComputeWithInstanceVolumesSupport,
910
ComputeWithPrivilegedSupport,
1011
generate_unique_instance_name,
1112
get_shim_commands,
@@ -34,6 +35,7 @@ class CudoCompute(
3435
ComputeWithFilteredOffersCached,
3536
ComputeWithCreateInstanceSupport,
3637
ComputeWithPrivilegedSupport,
38+
ComputeWithInstanceVolumesSupport,
3739
Compute,
3840
):
3941
def __init__(self, config: CudoConfig):

src/dstack/_internal/core/backends/digitalocean_base/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from dstack._internal.core.backends.base.compute import (
88
ComputeWithAllOffersCached,
99
ComputeWithCreateInstanceSupport,
10+
ComputeWithInstanceVolumesSupport,
1011
ComputeWithPrivilegedSupport,
1112
generate_unique_instance_name,
1213
get_user_data,
@@ -42,6 +43,7 @@ class BaseDigitalOceanCompute(
4243
ComputeWithAllOffersCached,
4344
ComputeWithCreateInstanceSupport,
4445
ComputeWithPrivilegedSupport,
46+
ComputeWithInstanceVolumesSupport,
4547
Compute,
4648
):
4749
def __init__(self, config: BaseDigitalOceanConfig, api_url: str, type: BackendType):

src/dstack/_internal/core/backends/features.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
ComputeWithCreateInstanceSupport,
33
ComputeWithGatewaySupport,
44
ComputeWithGroupProvisioningSupport,
5+
ComputeWithInstanceVolumesSupport,
56
ComputeWithMultinodeSupport,
67
ComputeWithPlacementGroupSupport,
78
ComputeWithPrivateGatewaySupport,
@@ -48,6 +49,10 @@ def _get_backends_with_compute_feature(
4849
configurator_classes=_configurator_classes,
4950
compute_feature_class=ComputeWithPrivilegedSupport,
5051
)
52+
BACKENDS_WITH_INSTANCE_VOLUMES_SUPPORT = _get_backends_with_compute_feature(
53+
configurator_classes=_configurator_classes,
54+
compute_feature_class=ComputeWithInstanceVolumesSupport,
55+
)
5156
BACKENDS_WITH_MULTINODE_SUPPORT = [BackendType.REMOTE] + _get_backends_with_compute_feature(
5257
configurator_classes=_configurator_classes,
5358
compute_feature_class=ComputeWithMultinodeSupport,

0 commit comments

Comments
 (0)