aws
diff --git a/‎.github/workflows/gpu-integ-tests.yml‎
Lines changed: 15 additions & 0 deletions b/‎.github/workflows/gpu-integ-tests.yml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.github/workflows/pr-checks-master.yml‎
Lines changed: 18 additions & 0 deletions b/‎.github/workflows/pr-checks-master.yml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎sagemaker-core/src/sagemaker/core/remote_function/job.py‎
Lines changed: 4 additions & 0 deletions b/‎sagemaker-core/src/sagemaker/core/remote_function/job.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sagemaker-core/tests/integ/remote_function/conftest.py‎
Lines changed: 46 additions & 0 deletions b/‎sagemaker-core/tests/integ/remote_function/conftest.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎sagemaker-core/tests/integ/remote_function/test_decorator.py‎
Lines changed: 15 additions & 6 deletions b/‎sagemaker-core/tests/integ/remote_function/test_decorator.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎sagemaker-mlops/tests/integ/feature_store/feature_processor/test_feature_processor_integ.py‎
Lines changed: 5 additions & 5 deletions b/‎sagemaker-mlops/tests/integ/feature_store/feature_processor/test_feature_processor_integ.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎sagemaker-train/src/sagemaker/train/container_drivers/distributed_drivers/mpi_driver.py‎
Lines changed: 6 additions & 2 deletions b/‎sagemaker-train/src/sagemaker/train/container_drivers/distributed_drivers/mpi_driver.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎sagemaker-train/tests/integ/ai_registry/test_dataset.py‎
Lines changed: 8 additions & 4 deletions b/‎sagemaker-train/tests/integ/ai_registry/test_dataset.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎sagemaker-train/tests/integ/train/test_benchmark_evaluator.py‎
Lines changed: 31 additions & 17 deletions b/‎sagemaker-train/tests/integ/train/test_benchmark_evaluator.py‎
Lines changed: 31 additions & 17 deletions
@@ -22,3 +22,18 @@ jobs:
         with:
           project-name: sagemaker-python-sdk-ci-health-gpu-integ-tests
           source-version: refs/heads/master
+
+  gpu-integ-tests-us-east-1:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Configure AWS Credentials (us-east-1)
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.CI_AWS_ROLE_US_EAST_1_ARN }}
+          aws-region: us-east-1
+          role-duration-seconds: 10800
+      - name: Run GPU Integ Tests (us-east-1)
+        uses: aws-actions/aws-codebuild-run-build@v1
+        with:
+          project-name: sagemaker-python-sdk-ci-health-gpu-integ-tests
+          source-version: refs/heads/master
@@ -215,3 +215,21 @@ jobs:
         with:
           project-name: ${{ github.event.repository.name }}-ci-${{ matrix.submodule }}-integ-tests
           source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}'
+
+  integ-tests-us-east-1:
+    runs-on: ubuntu-latest
+    needs: [detect-changes]
+    if: contains(fromJson(needs.detect-changes.outputs.submodules), 'sagemaker-train')
+    steps:
+      - name: Configure AWS Credentials (us-east-1)
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.CI_AWS_ROLE_US_EAST_1_ARN }}
+          aws-region: us-east-1
+          role-duration-seconds: 10800
+
+      - name: Run us-east-1 Integ Tests for sagemaker-train
+        uses: aws-actions/aws-codebuild-run-build@v1
+        with:
+          project-name: ${{ github.event.repository.name }}-ci-sagemaker-train-integ-tests
+          source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}'
@@ -860,6 +860,10 @@ def _get_default_spark_image(session):
         except ImportError:
             pass
 
+        # Spark 3.3 and below do not support py312; use 3.5 which supports both py39 and py312
+        if py_version == "312" and spark_version in ("2.4", "3.0", "3.1", "3.2", "3.3"):
+            spark_version = "3.5"
+
         image_uri = image_uris.retrieve(
             framework=SPARK_NAME,
             region=region,
 
@@ -171,6 +171,52 @@ def spark_test_container(sagemaker_session, sagemaker_sdk_tar_path, tmp_path_fac
     )
 
 
+@pytest.fixture(scope="session")
+def spark_pre_execution_commands(sagemaker_session):
+    """Build sagemaker-core wheel, upload to S3, and return pre-execution install commands.
+
+    This mirrors the pattern used in sagemaker-mlops feature_processor integ tests.
+    The Spark processing image does not have sagemaker-core pre-installed, so we must
+    build the local dev wheel and install it in the container via pre_execution_commands.
+    """
+    import subprocess
+    import glob
+    import tempfile
+    from sagemaker.core.s3 import S3Uploader
+
+    repo_root = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+    )
+    core_dir = os.path.join(repo_root, "sagemaker-core")
+
+    with tempfile.TemporaryDirectory() as dist_dir:
+        subprocess.run(
+            f"python -m build --wheel --outdir {dist_dir}",
+            shell=True,
+            cwd=core_dir,
+            check=True,
+        )
+        wheels = glob.glob(os.path.join(dist_dir, "sagemaker_core-*.whl"))
+        if not wheels:
+            raise FileNotFoundError(f"No sagemaker-core wheel found in {dist_dir}")
+        wheel_path = wheels[0]
+        wheel_name = os.path.basename(wheel_path)
+
+        s3_prefix = "s3://{}/spark-integ-test/wheels".format(
+            sagemaker_session.default_bucket()
+        )
+        S3Uploader.upload(wheel_path, s3_prefix, sagemaker_session=sagemaker_session)
+
+    PIP = "python3 -m pip install --root-user-action=ignore"
+    AWS = "python3 -m awscli"
+    cmds = [
+        f"{PIP} awscli",
+        f"{AWS} s3 cp {s3_prefix}/{wheel_name} /tmp/{wheel_name}",
+        f"{PIP} /tmp/{wheel_name}",
+    ]
+    return cmds
+
+
 @pytest.fixture(scope="session")
 def conda_env_yml():
     """Write conda yml file needed for tests."""
 
@@ -574,16 +574,18 @@ def my_func():
     assert client_error_message in str(error)
 
 
-@pytest.mark.skipif(
-    sys.version_info[:2] not in [(3, 9), (3, 12)],
-    reason="SageMaker Spark image only available for Python 3.9 and 3.12",
-)
-def test_decorator_with_spark_job(sagemaker_session, cpu_instance_type):
+# @pytest.mark.skipif(
+#     sys.version_info[:2] not in [(3, 9), (3, 12)],
+#     reason="SageMaker Spark image only available for Python 3.9 and 3.12",
+# )
+@pytest.mark.spark_py312
+def test_decorator_with_spark_job(sagemaker_session, cpu_instance_type, spark_pre_execution_commands):
     @remote(
         role=ROLE,
         instance_type=cpu_instance_type,
         sagemaker_session=sagemaker_session,
         keep_alive_period_in_seconds=60,
+        pre_execution_commands=spark_pre_execution_commands,
         spark_config=SparkConfig(
             configuration=[
                 {
@@ -598,7 +600,14 @@ def test_spark_transform():
 
         spark = SparkSession.builder.getOrCreate()
 
-        assert spark.conf.get("spark.app.name") == "remote-spark-test"
+        # Avoid bare assert here: pytest's assertion rewriting injects _pytest
+        # module references into the function bytecode, which causes
+        # deserialization to fail in the Spark container (no pytest installed).
+        app_name = spark.conf.get("spark.app.name")
+        if app_name != "remote-spark-test":
+            raise RuntimeError(
+                f"Expected spark.app.name='remote-spark-test', got '{app_name}'"
+            )
 
     test_spark_transform()
 
 
@@ -798,11 +798,11 @@ def transform(raw_s3_data_as_df):
 #     sys.version_info[:2] not in [(3, 9), (3, 12)],
 #     reason=f"SageMaker Spark image only supports Python 3.9 and 3.12, got {sys.version_info[:2]}",
 # )
-@pytest.mark.skip(
-    reason="Lake Formation credential vending (GetTemporaryGlueTableCredentials) requires "
-    "full LF environment setup (resource registration, trust policy, data location grants) "
-    "that is not configured in CI. See quip-amazon.com/S3FEAMMMuKm0 for details."
-)
+# @pytest.mark.skip(
+#     reason="Lake Formation credential vending (GetTemporaryGlueTableCredentials) requires "
+#     "full LF environment setup (resource registration, trust policy, data location grants) "
+#     "that is not configured in CI. See quip-amazon.com/S3FEAMMMuKm0 for details."
+# )
 @pytest.mark.spark_py312
 @pytest.mark.slow_test
 def test_to_pipeline_and_execute_with_lake_formation(
 
@@ -17,7 +17,11 @@
 import sys
 import json
 
-from sagemaker.train.container_drivers.distributed_drivers.mpi_utils import (
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from distributed_drivers.mpi_utils import (  # noqa: E402 # pylint: disable=C0413,E0611
     start_sshd_daemon,
     bootstrap_master_node,
     bootstrap_worker_node,
@@ -27,7 +31,7 @@
 )
 
 
-from sagemaker.train.container_drivers.common.utils import (
+from common.utils import (  # noqa: E402 # pylint: disable=C0413,E0611
     logger,
     hyperparameters_to_cli_args,
     get_process_count,
 
@@ -79,8 +79,9 @@ def test_create_dataset_from_s3_oss_dpo(self, unique_name, test_bucket, cleanup_
         assert dataset.name == unique_name
         assert dataset.customization_technique == CustomizationTechnique.DPO
 
+    @pytest.mark.us_east_1
     def test_create_dataset_from_s3_nova_sft(self, unique_name, test_bucket, cleanup_list):
-        """Test creating RLVR dataset from S3 URI."""
+        """Test creating Nova SFT dataset from S3 URI."""
         s3_uri = f"s3://{test_bucket}/test_datasets/Nova/nova_sft_train.jsonl"
         dataset = DataSet.create(
             name=unique_name,
@@ -92,8 +93,9 @@ def test_create_dataset_from_s3_nova_sft(self, unique_name, test_bucket, cleanup
         assert dataset.name == unique_name
         assert dataset.customization_technique == CustomizationTechnique.SFT
 
+    @pytest.mark.us_east_1
     def test_create_dataset_from_s3_nova_dpo(self, unique_name, test_bucket, cleanup_list):
-        """Test creating RLVR dataset from S3 URI."""
+        """Test creating Nova DPO dataset from S3 URI."""
         s3_uri = f"s3://{test_bucket}/test_datasets/Nova/nova_dpo_train.jsonl"
         dataset = DataSet.create(
             name=unique_name,
@@ -105,8 +107,9 @@ def test_create_dataset_from_s3_nova_dpo(self, unique_name, test_bucket, cleanup
         assert dataset.name == unique_name
         assert dataset.customization_technique == CustomizationTechnique.DPO
 
+    @pytest.mark.us_east_1
     def test_create_dataset_from_s3_nova_rft(self, unique_name, test_bucket, cleanup_list):
-        """Test creating RLVR dataset from S3 URI."""
+        """Test creating Nova RFT dataset from S3 URI."""
         s3_uri = f"s3://{test_bucket}/test_datasets/Nova/nova_rft_train.jsonl"
         dataset = DataSet.create(
             name=unique_name,
@@ -118,8 +121,9 @@ def test_create_dataset_from_s3_nova_rft(self, unique_name, test_bucket, cleanup
         assert dataset.name == unique_name
         assert dataset.customization_technique == CustomizationTechnique.RLVR
 
+    @pytest.mark.us_east_1
     def test_create_dataset_from_s3_nova_eval(self, unique_name, test_bucket, cleanup_list):
-        """Test creating RLVR dataset from S3 URI."""
+        """Test creating Nova eval dataset from S3 URI."""
         s3_uri = f"s3://{test_bucket}/test_datasets/Nova/nova_eval.jsonl"
         dataset = DataSet.create(
             name=unique_name,
 
@@ -23,8 +23,6 @@
     EvaluationPipelineExecution,
 )
 
-pytestmark = pytest.mark.gpu_intensive
-
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -63,13 +61,11 @@
     "region": "us-west-2",
 }
 
-# Nova model evaluation configuration (from commented section in notebook)
+# Nova model evaluation configuration (uses dedicated test account in us-east-1)
 NOVA_CONFIG = {
-    "model_package_arn": "arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3",
-    "dataset_s3_uri": "s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl",
-    "s3_output_path": "s3://mufi-test-serverless-iad/eval/",
-    "mlflow_tracking_server_arn": "arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server",
-    "model_package_group_arn": "arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models",
+    "dataset_s3_uri": "s3://sagemaker-us-east-1-784379639078/model-customization/eval/zc_test.jsonl",
+    "s3_output_path": "s3://sagemaker-us-east-1-784379639078/model-customization/eval/",
+    "model_package_group_arn": "arn:aws:sagemaker:us-east-1:784379639078:model-package-group/sdk-test-finetuned-models",
     "region": "us-east-1",
 }
 
@@ -288,7 +284,8 @@ def test_benchmark_subtasks_validation(self):
 
         logger.info("Subtask validation tests passed")
 
-    @pytest.mark.skip(reason="Pipeline creation fails - under investigation")
+    # @pytest.mark.skip(reason="Pipeline creation fails - under investigation")
+    @pytest.mark.gpu_intensive
     def test_benchmark_evaluation_base_model_only(self):
         """
         Test benchmark evaluation with base model only (no fine-tuned model).
@@ -341,28 +338,45 @@ def test_benchmark_evaluation_base_model_only(self):
         assert execution.status.overall_status == "Succeeded"
         logger.info("Base model only evaluation completed successfully")
 
-    @pytest.mark.skip(reason="Requires us-east-1 test infrastructure - tracked in AI-5")
+    @pytest.mark.gpu_intensive
+    @pytest.mark.us_east_1
     def test_benchmark_evaluation_nova_model(self):
         """
         Test benchmark evaluation with Nova model.
         
         This test uses a Nova fine-tuned model package in us-east-1 region.
         Configuration from commented section in benchmark_demo.ipynb.
         
-        Note: This test is currently skipped. Remove the @pytest.mark.skip decorator
-        when you want to enable it.
+        Note: This test requires a model package to exist in the model package group.
+        It should be run after a successful SFT or RLVR training job has produced one.
         """
+        import boto3
+        
         # Get benchmarks
         Benchmark = get_benchmarks()
 
+        # Dynamically find the latest model package in the group
+        sm_client = boto3.client("sagemaker", region_name=NOVA_CONFIG["region"])
+        packages = sm_client.list_model_packages(
+            ModelPackageGroupName="sdk-test-finetuned-models",
+            SortBy="CreationTime",
+            SortOrder="Descending",
+            MaxResults=1,
+        )
+        
+        if not packages["ModelPackageSummaryList"]:
+            pytest.skip("No model packages available in sdk-test-finetuned-models group. Run SFT/RLVR training first.")
+        
+        model_package_arn = packages["ModelPackageSummaryList"][0]["ModelPackageArn"]
+        logger.info(f"Using model package: {model_package_arn}")
+        
         logger.info("Creating BenchmarkEvaluator with Nova model")
 
         # Create evaluator with Nova model package
         evaluator = BenchMarkEvaluator(
             benchmark=Benchmark.MMLU,
-            model=NOVA_CONFIG["model_package_arn"],
+            model=model_package_arn,
             s3_output_path=NOVA_CONFIG["s3_output_path"],
-            mlflow_resource_arn=NOVA_CONFIG["mlflow_tracking_server_arn"],
             model_package_group=NOVA_CONFIG["model_package_group_arn"],
             base_eval_name="integ-test-nova-eval",
             region=NOVA_CONFIG["region"],
@@ -371,7 +385,7 @@ def test_benchmark_evaluation_nova_model(self):
         # Verify evaluator was created
         assert evaluator is not None
         assert evaluator.benchmark == Benchmark.MMLU
-        assert evaluator.model == NOVA_CONFIG["model_package_arn"]
+        assert evaluator.model == model_package_arn
         assert evaluator.region == NOVA_CONFIG["region"]
 
         logger.info(f"Created evaluator: {evaluator.base_eval_name}")
@@ -401,8 +415,8 @@ def test_benchmark_evaluation_nova_model(self):
         logger.info(f"Status after refresh: {execution.status.overall_status}")
 
         # Wait for completion
-        logger.info("Waiting for evaluation to complete (timeout: 1 hour)")
-        execution.wait(target_status="Succeeded", poll=30, timeout=3600)
+        logger.info("Waiting for evaluation to complete (timeout: 3 hours)")
+        execution.wait(target_status="Succeeded", poll=30, timeout=10800)
 
         # Verify completion
         assert execution.status.overall_status == "Succeeded"