@@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
115115 from sagemaker .serve import ModelBuilder
116116 import time
117117
118+ from sagemaker .core .utils .exceptions import FailedStatusError
119+
118120 training_job = TrainingJob .get (training_job_name = training_job_name , region = AWS_REGION )
119121 model_builder = ModelBuilder (model = training_job , instance_type = "ml.g5.4xlarge" , sagemaker_session = sagemaker_session )
120122 model_builder .accept_eula = True
@@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
123125 peft_type = model_builder ._fetch_peft ()
124126 adapter_name = f"{ endpoint_name } -adapter"
125127
126- endpoint = model_builder .deploy (
127- endpoint_name = endpoint_name ,
128- inference_component_name = adapter_name if peft_type == "LORA" else None ,
129- )
128+ try :
129+ endpoint = model_builder .deploy (
130+ endpoint_name = endpoint_name ,
131+ inference_component_name = adapter_name if peft_type == "LORA" else None ,
132+ )
133+ except FailedStatusError as e :
134+ # Endpoint provisioning can fail when the region is temporarily out of
135+ # capacity for the requested instance type. This is an environmental
136+ # condition unrelated to the SDK, so xfail rather than fail the build.
137+ if "InsufficientInstanceCapacity" in str (e ):
138+ cleanup_endpoints .append (endpoint_name )
139+ pytest .xfail (
140+ f"InsufficientInstanceCapacity for ml.g5.4xlarge in { AWS_REGION } : { e } "
141+ )
142+ raise
130143
131144 cleanup_endpoints .append (endpoint_name )
132145
0 commit comments