Skip to content

Commit d659cea

Browse files
committed
Do not create clusters from burstable AWS instances
Fixes #2872
1 parent 6311ba0 commit d659cea

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

src/dstack/_internal/core/backends/aws/compute.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@
3333
merge_tags,
3434
)
3535
from dstack._internal.core.backends.base.offers import get_catalog_offers
36-
from dstack._internal.core.errors import ComputeError, NoCapacityError, PlacementGroupInUseError
36+
from dstack._internal.core.errors import (
37+
ComputeError,
38+
NoCapacityError,
39+
PlacementGroupInUseError,
40+
PlacementGroupNotSupportedError,
41+
)
3742
from dstack._internal.core.models.backends.base import BackendType
3843
from dstack._internal.core.models.common import CoreModel
3944
from dstack._internal.core.models.gateways import (
@@ -46,7 +51,11 @@
4651
InstanceOffer,
4752
InstanceOfferWithAvailability,
4853
)
49-
from dstack._internal.core.models.placement import PlacementGroup, PlacementGroupProvisioningData
54+
from dstack._internal.core.models.placement import (
55+
PlacementGroup,
56+
PlacementGroupProvisioningData,
57+
PlacementStrategy,
58+
)
5059
from dstack._internal.core.models.resources import Memory, Range
5160
from dstack._internal.core.models.runs import JobProvisioningData, Requirements
5261
from dstack._internal.core.models.volumes import (
@@ -334,6 +343,8 @@ def create_placement_group(
334343
placement_group: PlacementGroup,
335344
master_instance_offer: InstanceOffer,
336345
) -> PlacementGroupProvisioningData:
346+
if not _offer_supports_placement_group(master_instance_offer, placement_group):
347+
raise PlacementGroupNotSupportedError()
337348
ec2_client = self.session.client("ec2", region_name=placement_group.configuration.region)
338349
logger.debug("Creating placement group %s...", placement_group.name)
339350
ec2_client.create_placement_group(
@@ -370,6 +381,8 @@ def is_suitable_placement_group(
370381
placement_group: PlacementGroup,
371382
instance_offer: InstanceOffer,
372383
) -> bool:
384+
if not _offer_supports_placement_group(instance_offer, placement_group):
385+
return False
373386
return (
374387
placement_group.configuration.backend == BackendType.AWS
375388
and placement_group.configuration.region == instance_offer.region
@@ -1059,6 +1072,15 @@ def _supported_instances(offer: InstanceOffer) -> bool:
10591072
return False
10601073

10611074

1075+
def _offer_supports_placement_group(offer: InstanceOffer, placement_group: PlacementGroup) -> bool:
1076+
if placement_group.configuration.placement_strategy != PlacementStrategy.CLUSTER:
1077+
return True
1078+
for family in ["t3.", "t2."]:
1079+
if offer.instance.name.startswith(family):
1080+
return False
1081+
return True
1082+
1083+
10621084
def _get_maximum_efa_interfaces(ec2_client: botocore.client.BaseClient, instance_type: str) -> int:
10631085
try:
10641086
response = ec2_client.describe_instance_types(

src/dstack/_internal/core/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ class PlacementGroupInUseError(ComputeError):
110110
pass
111111

112112

113+
class PlacementGroupNotSupportedError(ComputeError):
114+
pass
115+
116+
113117
class NotYetTerminated(ComputeError):
114118
"""
115119
Used by Compute.terminate_instance to signal that instance termination is not complete

src/dstack/_internal/server/background/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def start_background_tasks() -> AsyncIOScheduler:
109109
process_instances,
110110
IntervalTrigger(seconds=4, jitter=2),
111111
kwargs={"batch_size": 5},
112-
max_instances=4 if replica == 0 else 1,
112+
max_instances=2 if replica == 0 else 1,
113113
)
114114
_scheduler.add_job(
115115
process_fleets,

src/dstack/_internal/server/background/tasks/process_instances.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from dstack._internal.core.errors import (
4646
BackendError,
4747
NotYetTerminated,
48+
PlacementGroupNotSupportedError,
4849
ProvisioningError,
4950
)
5051
from dstack._internal.core.models.backends.base import BackendType
@@ -1067,6 +1068,12 @@ async def _create_placement_group(
10671068
placement_group_model_to_placement_group(placement_group_model),
10681069
master_instance_offer,
10691070
)
1071+
except PlacementGroupNotSupportedError:
1072+
logger.debug(
1073+
"Skipping offer %s because placement group not supported",
1074+
master_instance_offer.instance.name,
1075+
)
1076+
return None
10701077
except BackendError as e:
10711078
logger.warning(
10721079
"Failed to create placement group %s in %s/%s: %r",

0 commit comments

Comments
 (0)