Skip to content

Commit e3b82f8

Browse files
committed
Support GCP reservations
Allow users to specify a specifically-targeted GCP reservation in fleet configurations: ```yaml type: fleet nodes: 1 backends: [gcp] reservation: my-reservation ``` For reservations shared between projects, the full syntax can be used to reference the project that owns the reservation: ```yaml type: fleet nodes: 1 backends: [gcp] reservation: projects/my-proj/reservations/my-reservation ``` `dstack` will locate the specified reservation, suggest offers that match the reservation's properties, and provision instances in the reservation. If there are multiple reservations with the specified name, all of them will be considered for provisioning. Using reservations requires the `compute.reservations.list` permission in the project that owns the reservations. The implementation was only tested with on-demand reservations. Whether other reservation types work can be confirmed later, which includes Future Reservations in Calendar Mode and Future Reservations in AI Hypercomputer.
1 parent 65f8d48 commit e3b82f8

15 files changed

Lines changed: 319 additions & 68 deletions

File tree

docs/docs/concepts/backends.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ gcloud projects list --format="json(projectId)"
520520
compute.networks.updatePolicy
521521
compute.regions.get
522522
compute.regions.list
523+
compute.reservations.list
523524
compute.resourcePolicies.create
524525
compute.resourcePolicies.delete
525526
compute.routers.list
@@ -543,6 +544,9 @@ gcloud projects list --format="json(projectId)"
543544
Also, the use of TPUs requires the `serviceAccountUser` role.
544545
For TPU VMs, dstack will use the default service account.
545546

547+
If you plan to use shared reservations, the `compute.reservations.list`
548+
permission is required in the project that owns the reservations.
549+
546550
??? info "Required APIs"
547551
First, ensure the required APIs are enabled in your GCP `project_id`.
548552

docs/docs/guides/troubleshooting.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ one of these features, `dstack` will only select offers from the backends that s
111111
are only supported by the `aws`, `azure`, `gcp`, `nebius`, `oci`, and `vultr` backends,
112112
as well as SSH fleets.
113113
- [Reservations](../reference/dstack.yml/fleet.md#reservation)
114-
are only supported by the `aws` backend.
114+
are only supported by the `aws` and `gcp` backends.
115115

116116
#### Cause 8: dstack Sky balance
117117

src/dstack/_internal/core/backends/aws/compute.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import threading
2+
from collections.abc import Iterable
23
from concurrent.futures import ThreadPoolExecutor, as_completed
34
from typing import Any, Callable, Dict, List, Optional, Tuple
45

@@ -34,7 +35,11 @@
3435
get_user_data,
3536
merge_tags,
3637
)
37-
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
38+
from dstack._internal.core.backends.base.offers import (
39+
OfferModifier,
40+
get_catalog_offers,
41+
get_offers_disk_modifier,
42+
)
3843
from dstack._internal.core.errors import (
3944
ComputeError,
4045
NoCapacityError,
@@ -159,10 +164,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
159164
)
160165
return availability_offers
161166

162-
def get_offers_modifier(
163-
self, requirements: Requirements
164-
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
165-
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
167+
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
168+
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
166169

167170
def _get_offers_cached_key(self, requirements: Requirements) -> int:
168171
# Requirements is not hashable, so we use a hack to get arguments hash

src/dstack/_internal/core/backends/azure/compute.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import base64
22
import enum
33
import re
4+
from collections.abc import Iterable
45
from concurrent.futures import ThreadPoolExecutor, as_completed
5-
from typing import Callable, Dict, List, Optional, Tuple
6+
from typing import Dict, List, Optional, Tuple
67

78
from azure.core.credentials import TokenCredential
89
from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError
@@ -51,7 +52,11 @@
5152
merge_tags,
5253
requires_nvidia_proprietary_kernel_modules,
5354
)
54-
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
55+
from dstack._internal.core.backends.base.offers import (
56+
OfferModifier,
57+
get_catalog_offers,
58+
get_offers_disk_modifier,
59+
)
5560
from dstack._internal.core.consts import DSTACK_OS_IMAGE_WITH_PROPRIETARY_NVIDIA_KERNEL_MODULES
5661
from dstack._internal.core.errors import ComputeError, NoCapacityError
5762
from dstack._internal.core.models.backends.base import BackendType
@@ -108,10 +113,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
108113
)
109114
return offers_with_availability
110115

111-
def get_offers_modifier(
112-
self, requirements: Requirements
113-
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
114-
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
116+
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
117+
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
115118

116119
def create_instance(
117120
self,

src/dstack/_internal/core/backends/base/compute.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from gpuhunt import CPUArchitecture
1818

1919
from dstack._internal import settings
20-
from dstack._internal.core.backends.base.offers import filter_offers_by_requirements
20+
from dstack._internal.core.backends.base.offers import OfferModifier, filter_offers_by_requirements
2121
from dstack._internal.core.consts import (
2222
DSTACK_RUNNER_HTTP_PORT,
2323
DSTACK_RUNNER_SSH_PORT,
@@ -168,17 +168,13 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
168168
"""
169169
pass
170170

171-
def get_offers_modifier(
172-
self, requirements: Requirements
173-
) -> Optional[
174-
Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
175-
]:
171+
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
176172
"""
177-
Returns a modifier function that modifies offers before they are filtered by requirements.
178-
Can return `None` to exclude the offer.
173+
Returns functions that modify offers before they are filtered by requirements.
174+
A modifier function can return `None` to exclude the offer.
179175
E.g. can be used to set appropriate disk size based on requirements.
180176
"""
181-
return None
177+
return []
182178

183179
def get_offers_post_filter(
184180
self, requirements: Requirements
@@ -191,14 +187,7 @@ def get_offers_post_filter(
191187

192188
def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvailability]:
193189
offers = self._get_all_offers_with_availability_cached()
194-
modifier = self.get_offers_modifier(requirements)
195-
if modifier is not None:
196-
modified_offers = []
197-
for o in offers:
198-
modified_offer = modifier(o)
199-
if modified_offer is not None:
200-
modified_offers.append(modified_offer)
201-
offers = modified_offers
190+
offers = self.__apply_modifiers(offers, self.get_offers_modifiers(requirements))
202191
offers = filter_offers_by_requirements(offers, requirements)
203192
post_filter = self.get_offers_post_filter(requirements)
204193
if post_filter is not None:
@@ -212,6 +201,20 @@ def get_offers(self, requirements: Requirements) -> List[InstanceOfferWithAvaila
212201
def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
213202
return self.get_all_offers_with_availability()
214203

204+
@staticmethod
205+
def __apply_modifiers(
206+
offers: Iterable[InstanceOfferWithAvailability], modifiers: Iterable[OfferModifier]
207+
) -> list[InstanceOfferWithAvailability]:
208+
modified_offers = []
209+
for offer in offers:
210+
for modifier in modifiers:
211+
offer = modifier(offer)
212+
if offer is None:
213+
break
214+
else:
215+
modified_offers.append(offer)
216+
return modified_offers
217+
215218

216219
class ComputeWithFilteredOffersCached(ABC):
217220
"""
@@ -341,6 +344,15 @@ class ComputeWithMultinodeSupport:
341344
class ComputeWithReservationSupport:
342345
"""
343346
Must be subclassed to support provisioning from reservations.
347+
348+
The following is expected from a backend that supports reservations:
349+
350+
- `get_offers` respects `Requirements.reservation` if set, and only returns
351+
offers that can be provisioned in the configured reservation. It can
352+
adjust some offer properties such as `availability` and
353+
`availability_zones` if necessary.
354+
- `create_instance` respects `InstanceConfig.reservation` if set, and
355+
provisions the instance in the configured reservation.
344356
"""
345357

346358
pass
@@ -391,6 +403,12 @@ def is_suitable_placement_group(
391403
"""
392404
pass
393405

406+
def are_placement_groups_compatible_with_reservations(self) -> bool:
407+
"""
408+
Whether placement groups can be used for instances provisioned in reservations.
409+
"""
410+
return True
411+
394412

395413
class ComputeWithGatewaySupport(ABC):
396414
"""

src/dstack/_internal/core/backends/base/offers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,12 @@ def choose_disk_size_mib(
199199
return round(disk_size_gib * 1024)
200200

201201

202+
OfferModifier = Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]
203+
204+
202205
def get_offers_disk_modifier(
203206
configurable_disk_size: Range[Memory], requirements: Requirements
204-
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
207+
) -> OfferModifier:
205208
"""
206209
Returns a func that modifies offers disk by setting min value that satisfies both
207210
`configurable_disk_size` and `requirements`.

src/dstack/_internal/core/backends/datacrunch/compute.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import Callable, Dict, List, Optional
1+
from collections.abc import Iterable
2+
from typing import Dict, List, Optional
23

34
from datacrunch import DataCrunchClient
45
from datacrunch.exceptions import APIException
@@ -12,7 +13,11 @@
1213
generate_unique_instance_name,
1314
get_shim_commands,
1415
)
15-
from dstack._internal.core.backends.base.offers import get_catalog_offers, get_offers_disk_modifier
16+
from dstack._internal.core.backends.base.offers import (
17+
OfferModifier,
18+
get_catalog_offers,
19+
get_offers_disk_modifier,
20+
)
1621
from dstack._internal.core.backends.datacrunch.models import DataCrunchConfig
1722
from dstack._internal.core.errors import NoCapacityError
1823
from dstack._internal.core.models.backends.base import BackendType
@@ -59,10 +64,8 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
5964
offers_with_availability = self._get_offers_with_availability(offers)
6065
return offers_with_availability
6166

62-
def get_offers_modifier(
63-
self, requirements: Requirements
64-
) -> Callable[[InstanceOfferWithAvailability], Optional[InstanceOfferWithAvailability]]:
65-
return get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)
67+
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
68+
return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
6669

6770
def _get_offers_with_availability(
6871
self, offers: List[InstanceOffer]

0 commit comments

Comments
 (0)