tests: unskip three tests with individual issues in train (#5894)

lucasjia-aws · web-flow · commit 16ffb943da9e · 2026-05-28T11:13:06.000-07:00
* tests: unskip three tests

* fix: use relative imports in mpi_driver.py for container compatibility

The MPI driver script used absolute imports (from sagemaker.train.container_drivers...)
which fail at runtime in the training container because sagemaker-train is not installed
there. The driver scripts are copied to /opt/ml/input/data/sm_drivers/ and executed
directly by the container entrypoint.

Changed to sys.path-based relative imports matching the pattern used by
torchrun_driver.py, which works correctly in the container environment.

* test: migrate nova eval test to use own account resources

Remove cross-account dependency in test_benchmark_evaluation_nova_model by
replacing resources from account 052150106756 with our test account
(729646638167) in us-east-1. Also removed mlflow_tracking_server_arn since
no MLflow server exists in us-east-1.

Test remains skipped pending us-east-1 test infrastructure migration to a
dedicated test account.

* test: remove module level skip
diff --git a/sagemaker-train/src/sagemaker/train/container_drivers/distributed_drivers/mpi_driver.py b/sagemaker-train/src/sagemaker/train/container_drivers/distributed_drivers/mpi_driver.py
@@ -17,7 +17,11 @@
 import sys
 import json
 
-from sagemaker.train.container_drivers.distributed_drivers.mpi_utils import (
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from distributed_drivers.mpi_utils import (  # noqa: E402 # pylint: disable=C0413,E0611
     start_sshd_daemon,
     bootstrap_master_node,
     bootstrap_worker_node,
@@ -27,7 +31,7 @@
 )
 
 
-from sagemaker.train.container_drivers.common.utils import (
+from common.utils import (  # noqa: E402 # pylint: disable=C0413,E0611
     logger,
     hyperparameters_to_cli_args,
     get_process_count,
diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py
@@ -23,8 +23,6 @@
     EvaluationPipelineExecution,
 )
 
-pytestmark = pytest.mark.gpu_intensive
-
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -63,13 +61,12 @@
     "region": "us-west-2",
 }
 
-# Nova model evaluation configuration (from commented section in notebook)
+# Nova model evaluation configuration (uses our own test account in us-east-1)
 NOVA_CONFIG = {
-    "model_package_arn": "arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3",
-    "dataset_s3_uri": "s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl",
-    "s3_output_path": "s3://mufi-test-serverless-iad/eval/",
-    "mlflow_tracking_server_arn": "arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server",
-    "model_package_group_arn": "arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models",
+    "model_package_arn": "arn:aws:sagemaker:us-east-1:729646638167:model-package/sdk-test-finetuned-models/65",
+    "dataset_s3_uri": "s3://sagemaker-us-east-1-729646638167/model-customization/eval/zc_test.jsonl",
+    "s3_output_path": "s3://sagemaker-us-east-1-729646638167/model-customization/eval/",
+    "model_package_group_arn": "arn:aws:sagemaker:us-east-1:729646638167:model-package-group/sdk-test-finetuned-models",
     "region": "us-east-1",
 }
 
@@ -288,7 +285,7 @@ def test_benchmark_subtasks_validation(self):
         
         logger.info("Subtask validation tests passed")
 
-    @pytest.mark.skip(reason="Pipeline creation fails - under investigation")
+    # @pytest.mark.skip(reason="Pipeline creation fails - under investigation")
     @pytest.mark.gpu_intensive
     def test_benchmark_evaluation_base_model_only(self):
         """
@@ -342,16 +339,15 @@ def test_benchmark_evaluation_base_model_only(self):
         assert execution.status.overall_status == "Succeeded"
         logger.info("Base model only evaluation completed successfully")
 
-    @pytest.mark.skip(reason="Requires us-east-1 test infrastructure - tracked in AI-5")
+    @pytest.mark.skip(reason="Pending us-east-1 test infrastructure migration to dedicated test account")
     def test_benchmark_evaluation_nova_model(self):
         """
         Test benchmark evaluation with Nova model.
         
         This test uses a Nova fine-tuned model package in us-east-1 region.
         Configuration from commented section in benchmark_demo.ipynb.
         
-        Note: This test is currently skipped. Remove the @pytest.mark.skip decorator
-        when you want to enable it.
+        Note: This test is currently skipped pending us-east-1 test infra migration.
         """
         # Get benchmarks
         Benchmark = get_benchmarks()
@@ -363,7 +359,6 @@ def test_benchmark_evaluation_nova_model(self):
             benchmark=Benchmark.MMLU,
             model=NOVA_CONFIG["model_package_arn"],
             s3_output_path=NOVA_CONFIG["s3_output_path"],
-            mlflow_resource_arn=NOVA_CONFIG["mlflow_tracking_server_arn"],
             model_package_group=NOVA_CONFIG["model_package_group_arn"],
             base_eval_name="integ-test-nova-eval",
             region=NOVA_CONFIG["region"],
diff --git a/sagemaker-train/tests/integ/train/test_model_trainer.py b/sagemaker-train/tests/integ/train/test_model_trainer.py
@@ -96,7 +96,7 @@ def test_hp_contract_basic_sh_script(sagemaker_session):
 
 
 # skip this test for now as requirments.txt is not resolved
-@pytest.mark.skip(reason="MPI distributed training does not resolve requirements.txt on worker nodes")
+# @pytest.mark.skip(reason="MPI distributed training does not resolve requirements.txt on worker nodes")
 def test_hp_contract_mpi_script(sagemaker_session):
     compute = Compute(instance_type="ml.m5.xlarge", instance_count=2)
     model_trainer = ModelTrainer(