Skip to content

Commit 0a9da42

Browse files
committed
test: Xfail serve deploy integ test on InsufficientInstanceCapacity
1 parent a413f82 commit 0a9da42

1 file changed

Lines changed: 17 additions & 4 deletions

File tree

sagemaker-serve/tests/integ/test_model_customization_deployment.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
115115
from sagemaker.serve import ModelBuilder
116116
import time
117117

118+
from sagemaker.core.utils.exceptions import FailedStatusError
119+
118120
training_job = TrainingJob.get(training_job_name=training_job_name, region=AWS_REGION)
119121
model_builder = ModelBuilder(model=training_job, instance_type="ml.g5.4xlarge", sagemaker_session=sagemaker_session)
120122
model_builder.accept_eula = True
@@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
123125
peft_type = model_builder._fetch_peft()
124126
adapter_name = f"{endpoint_name}-adapter"
125127

126-
endpoint = model_builder.deploy(
127-
endpoint_name=endpoint_name,
128-
inference_component_name=adapter_name if peft_type == "LORA" else None,
129-
)
128+
try:
129+
endpoint = model_builder.deploy(
130+
endpoint_name=endpoint_name,
131+
inference_component_name=adapter_name if peft_type == "LORA" else None,
132+
)
133+
except FailedStatusError as e:
134+
# Endpoint provisioning can fail when the region is temporarily out of
135+
# capacity for the requested instance type. This is an environmental
136+
# condition unrelated to the SDK, so xfail rather than fail the build.
137+
if "InsufficientInstanceCapacity" in str(e):
138+
cleanup_endpoints.append(endpoint_name)
139+
pytest.xfail(
140+
f"InsufficientInstanceCapacity for ml.g5.4xlarge in {AWS_REGION}: {e}"
141+
)
142+
raise
130143

131144
cleanup_endpoints.append(endpoint_name)
132145

0 commit comments

Comments
 (0)