Skip to content

Commit 6200522

Browse files
authored
Use DLAMI on AWS (#2782)
1 parent 3fdc6dd commit 6200522

3 files changed

Lines changed: 54 additions & 5 deletions

File tree

src/dstack/_internal/core/backends/aws/compute.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def create_instance(
231231
image_id, username = aws_resources.get_image_id_and_username(
232232
ec2_client=ec2_client,
233233
cuda=len(instance_offer.instance.resources.gpus) > 0,
234+
instance_type=instance_offer.instance.name,
234235
image_config=self.config.os_images,
235236
)
236237
response = ec2_resource.create_instances(

src/dstack/_internal/core/backends/aws/resources.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
logger = get_logger(__name__)
1313

1414
DSTACK_ACCOUNT_ID = "142421590066"
15+
DLAMI_OWNER_ACCOUNT_ID = "898082745236"
1516

1617

1718
def get_image_id_and_username(
1819
ec2_client: botocore.client.BaseClient,
1920
cuda: bool,
21+
instance_type: str,
2022
image_config: Optional[AWSOSImageConfig] = None,
2123
) -> tuple[str, str]:
2224
if image_config is not None:
@@ -27,6 +29,11 @@ def get_image_id_and_username(
2729
image_name = image.name
2830
image_owner = image.owner
2931
username = image.user
32+
elif _supported_by_dlami(instance_type):
33+
# TODO: Update DLAMI image version from time to time
34+
image_name = "Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04) 20250516"
35+
image_owner = DLAMI_OWNER_ACCOUNT_ID
36+
username = "ubuntu"
3037
else:
3138
image_name = (
3239
f"dstack-{version.base_image}" if not cuda else f"dstack-cuda-{version.base_image}"
@@ -628,6 +635,25 @@ def _is_private_subnet_with_internet_egress(
628635
return False
629636

630637

638+
def _supported_by_dlami(instance_type: str) -> bool:
639+
# Currently only p3. instances are not supported by DLAMI among GPU instances.
640+
return any(
641+
instance_type.startswith(family)
642+
for family in [
643+
"g4dn.",
644+
"g5.",
645+
"g6.",
646+
"gr6.",
647+
"g6e.",
648+
"p4d.",
649+
"p4de.",
650+
"p5.",
651+
"p5e.",
652+
"p6-b200.",
653+
]
654+
)
655+
656+
631657
def get_reservation(
632658
ec2_client: botocore.client.BaseClient,
633659
reservation_id: str,

src/tests/_internal/core/backends/aws/test_resources.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ def test_returns_the_latest_available(self, ec2_client_mock: Mock):
116116
},
117117
]
118118
}
119-
image_id, username = get_image_id_and_username(ec2_client_mock, cuda=False)
119+
image_id, username = get_image_id_and_username(
120+
ec2_client_mock,
121+
cuda=False,
122+
instance_type="some",
123+
)
120124
assert image_id == "ami-00000000000000003"
121125
assert username == "ubuntu"
122126

@@ -138,7 +142,11 @@ def test_raises_resource_not_found_if_none_available(
138142
]
139143
}
140144
with pytest.raises(ComputeResourceNotFoundError):
141-
get_image_id_and_username(ec2_client_mock, cuda=False)
145+
get_image_id_and_username(
146+
ec2_client_mock,
147+
cuda=False,
148+
instance_type="some",
149+
)
142150
assert "image 'dstack-0.0' not found" in caplog.text
143151

144152
@pytest.mark.parametrize(
@@ -152,7 +160,11 @@ def test_uses_dstack_image_name_and_account_id_if_image_config_not_provided(
152160
self, monkeypatch: pytest.MonkeyPatch, ec2_client_mock: Mock, cuda: bool, expected: str
153161
):
154162
monkeypatch.setattr("dstack.version.base_image", "0.0")
155-
_, username = get_image_id_and_username(ec2_client_mock, cuda)
163+
_, username = get_image_id_and_username(
164+
ec2_client_mock,
165+
cuda=cuda,
166+
instance_type="some",
167+
)
156168
assert username == "ubuntu"
157169
ec2_client_mock.describe_images.assert_called_once_with(
158170
Filters=[{"Name": "name", "Values": [expected]}], Owners=["142421590066"]
@@ -184,7 +196,12 @@ def test_uses_image_config_if_provided(
184196
user="dstack",
185197
),
186198
)
187-
_, username = get_image_id_and_username(ec2_client_mock, cuda, image_config)
199+
_, username = get_image_id_and_username(
200+
ec2_client_mock,
201+
cuda=cuda,
202+
instance_type="some",
203+
image_config=image_config,
204+
)
188205
assert username == expected_username
189206
ec2_client_mock.describe_images.assert_called_once_with(
190207
Filters=[{"Name": "name", "Values": [expected_name]}],
@@ -202,5 +219,10 @@ def test_raises_resource_not_found_if_image_config_property_not_set(
202219
),
203220
)
204221
with pytest.raises(ComputeResourceNotFoundError):
205-
get_image_id_and_username(ec2_client_mock, cuda=False, image_config=image_config)
222+
get_image_id_and_username(
223+
ec2_client_mock,
224+
cuda=False,
225+
instance_type="some",
226+
image_config=image_config,
227+
)
206228
assert "cpu image not configured" in caplog.text

0 commit comments

Comments
 (0)