Merge branch 'master' into master-mtrl-eval-issue-fix

rsareddy0329 · web-flow · commit 95560e97b44d · 2026-06-16T16:57:53.000-07:00
diff --git a/.github/workflows/gpu-integ-tests.yml b/.github/workflows/gpu-integ-tests.yml
@@ -79,13 +79,36 @@ jobs:
           project-name: sagemaker-python-sdk-ci-health-gpu-integ-tests
           source-version: refs/heads/master
 
+  # Bedrock model-import integ tests. Run serially (concurrency 1) in their own
+  # CodeBuild project because the "Concurrent model import jobs" Bedrock quota is
+  # fixed at 1 and not raisable; running them in parallel (as PR checks did)
+  # makes them collide and flake. us-west-2 only (no us_east_1-marked tests).
+  # Folded into the same run-level pass/fail metric as the GPU jobs below, so it
+  # shares the GpuIntegRunAlarm rather than getting a separate alarm.
+  import-model-integ-tests:
+    needs: check-prior-success
+    if: needs.check-prior-success.outputs.already_succeeded != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
+          aws-region: us-west-2
+          role-duration-seconds: 10800
+      - name: Run Bedrock Model-Import Integ Tests
+        uses: aws-actions/aws-codebuild-run-build@v1
+        with:
+          project-name: sagemaker-python-sdk-ci-health-import-model-integ-tests
+          source-version: refs/heads/master
+
   # Run-level result: a run is successful only if BOTH region jobs succeeded.
   # Emits GpuIntegRunFailure = 1 (failed) / 0 (succeeded) to CloudWatch in
   # us-west-2. The CDK alarm (GpuIntegRunAlarm) sums this over a UTC day and
   # cuts a daytime sev2 when all of the day's runs failed. Skipped when the gate
   # short-circuited today's run (an earlier run already succeeded).
   report-result:
-    needs: [check-prior-success, gpu-integ-tests, gpu-integ-tests-us-east-1]
+    needs: [check-prior-success, gpu-integ-tests, gpu-integ-tests-us-east-1, import-model-integ-tests]
     # Only emit the daily alarm metric for scheduled runs that actually executed
     # the test jobs:
     #   - check-prior-success.result == 'success': if the gate job itself failed,
@@ -111,12 +134,13 @@ jobs:
             exit 0
           fi
           if [ "${{ needs.gpu-integ-tests.result }}" == "success" ] && \
-             [ "${{ needs.gpu-integ-tests-us-east-1.result }}" == "success" ]; then
+             [ "${{ needs.gpu-integ-tests-us-east-1.result }}" == "success" ] && \
+             [ "${{ needs.import-model-integ-tests.result }}" == "success" ]; then
             value=0
-            echo "Both region jobs succeeded; emitting GpuIntegRunFailure=0"
+            echo "All region/import jobs succeeded; emitting GpuIntegRunFailure=0"
           else
             value=1
-            echo "At least one region job did not succeed; emitting GpuIntegRunFailure=1"
+            echo "At least one region/import job did not succeed; emitting GpuIntegRunFailure=1"
           fi
           aws cloudwatch put-metric-data \
             --namespace GpuIntegRunMetrics \
diff --git a/sagemaker-mlops/tox.ini b/sagemaker-mlops/tox.ini
@@ -64,6 +64,7 @@ markers =
     release
     image_uris_unit_test
     timeout: mark a test as a timeout.
+    serial: mark a test that must not run concurrently with others sharing the same resources.
 
 [testenv]
 setenv =
diff --git a/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py b/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py
@@ -170,6 +170,7 @@ def _setup_model_files(s3_artifacts_uri, s3_client):
 
 
 @pytest.mark.serial
+@pytest.mark.import_model
 class TestBedrockImportJobPolling:
     """Test import job polling for OSS models (Option C: deploy only waits for import)."""
 
@@ -236,6 +237,7 @@ def test_deploy_oss_model_waits_for_import_completion(
 
 
 @pytest.mark.serial
+@pytest.mark.import_model
 class TestBedrockProvisionedThroughput:
     """Test create_provisioned_throughput as a standalone method.
 
diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
         from sagemaker.serve import ModelBuilder
         import time
 
+        from sagemaker.core.utils.exceptions import FailedStatusError
+
         training_job = TrainingJob.get(training_job_name=training_job_name, region=AWS_REGION)
         model_builder = ModelBuilder(model=training_job, instance_type="ml.g5.4xlarge", sagemaker_session=sagemaker_session)
         model_builder.accept_eula = True
@@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
         peft_type = model_builder._fetch_peft()
         adapter_name = f"{endpoint_name}-adapter"
 
-        endpoint = model_builder.deploy(
-            endpoint_name=endpoint_name,
-            inference_component_name=adapter_name if peft_type == "LORA" else None,
-        )
+        try:
+            endpoint = model_builder.deploy(
+                endpoint_name=endpoint_name,
+                inference_component_name=adapter_name if peft_type == "LORA" else None,
+            )
+        except FailedStatusError as e:
+            # Endpoint provisioning can fail when the region is temporarily out of
+            # capacity for the requested instance type. This is an environmental
+            # condition unrelated to the SDK, so xfail rather than fail the build.
+            if "InsufficientInstanceCapacity" in str(e):
+                cleanup_endpoints.append(endpoint_name)
+                pytest.xfail(
+                    f"InsufficientInstanceCapacity for ml.g5.4xlarge in {AWS_REGION}: {e}"
+                )
+            raise
 
         cleanup_endpoints.append(endpoint_name)
 
@@ -575,6 +588,7 @@ def test_bedrock_model_builder_creation(self, training_job):
                 f"BedrockModelBuilder creation failed: {str(e)}. This might be due to sagemaker-core integration issues.")
 
     @pytest.mark.slow
+    @pytest.mark.import_model
     def test_bedrock_job_created(self, deployed_model_arn):
         """Test that Bedrock import job was created successfully."""
         assert deployed_model_arn is not None
@@ -583,6 +597,7 @@ def test_bedrock_job_created(self, deployed_model_arn):
     # Documentation recommends retries: https://docs.aws.amazon.com/bedrock/latest/userguide/invoke-imported-model.html#handle-model-not-ready-exception.
     # TODO: Fix using provisioned throughput or better wait mechanism
     @pytest.mark.slow
+    @pytest.mark.import_model
     def test_bedrock_model_invoke(self, deployed_model_arn, bedrock_runtime):
         logger.warning(
             "This test is known to be flaky due to 'model not ready' exceptions from Bedrock. "
diff --git a/sagemaker-serve/tox.ini b/sagemaker-serve/tox.ini
@@ -65,6 +65,7 @@ markers =
     timeout: mark a test as a timeout.
     gpu_intensive: mark a test as GPU resource intensive (runs on scheduled CI, not PR checks).
     us_east_1: mark a test that requires us-east-1 test account credentials (784379639078).
+    import_model: mark a test that creates a Bedrock model import job. Concurrent model import jobs are capped at 1 by a non-raisable Bedrock service quota, so these run serially in a dedicated scheduled CI run, not in PR checks.
 
 [testenv]
 setenv =