Skip to content

Commit 22f8f25

Browse files
authored
chore: Register 'serial' pytest marker in mlops tox.ini (#5951)
* chore: Register 'serial' pytest marker in mlops tox.ini The Lake Formation integ tests use @pytest.mark.serial, but the marker was never registered, producing PytestUnknownMarkWarning noise on every run. Register it in the [pytest] markers section. * test: Xfail serve deploy integ test on InsufficientInstanceCapacity
1 parent b71bc7e commit 22f8f25

2 files changed

Lines changed: 18 additions & 4 deletions

File tree

sagemaker-mlops/tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ markers =
6464
release
6565
image_uris_unit_test
6666
timeout: mark a test as a timeout.
67+
serial: mark a test that must not run concurrently with others sharing the same resources.
6768

6869
[testenv]
6970
setenv =

sagemaker-serve/tests/integ/test_model_customization_deployment.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
115115
from sagemaker.serve import ModelBuilder
116116
import time
117117

118+
from sagemaker.core.utils.exceptions import FailedStatusError
119+
118120
training_job = TrainingJob.get(training_job_name=training_job_name, region=AWS_REGION)
119121
model_builder = ModelBuilder(model=training_job, instance_type="ml.g5.4xlarge", sagemaker_session=sagemaker_session)
120122
model_builder.accept_eula = True
@@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
123125
peft_type = model_builder._fetch_peft()
124126
adapter_name = f"{endpoint_name}-adapter"
125127

126-
endpoint = model_builder.deploy(
127-
endpoint_name=endpoint_name,
128-
inference_component_name=adapter_name if peft_type == "LORA" else None,
129-
)
128+
try:
129+
endpoint = model_builder.deploy(
130+
endpoint_name=endpoint_name,
131+
inference_component_name=adapter_name if peft_type == "LORA" else None,
132+
)
133+
except FailedStatusError as e:
134+
# Endpoint provisioning can fail when the region is temporarily out of
135+
# capacity for the requested instance type. This is an environmental
136+
# condition unrelated to the SDK, so xfail rather than fail the build.
137+
if "InsufficientInstanceCapacity" in str(e):
138+
cleanup_endpoints.append(endpoint_name)
139+
pytest.xfail(
140+
f"InsufficientInstanceCapacity for ml.g5.4xlarge in {AWS_REGION}: {e}"
141+
)
142+
raise
130143

131144
cleanup_endpoints.append(endpoint_name)
132145

0 commit comments

Comments
 (0)