2323 EvaluationPipelineExecution ,
2424)
2525
26- pytestmark = pytest .mark .gpu_intensive
27-
2826# Configure logging
2927logging .basicConfig (
3028 level = logging .INFO ,
6361 "region" : "us-west-2" ,
6462}
6563
66- # Nova model evaluation configuration (from commented section in notebook )
64+ # Nova model evaluation configuration (uses our own test account in us-east-1 )
6765NOVA_CONFIG = {
68- "model_package_arn" : "arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3" ,
69- "dataset_s3_uri" : "s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl" ,
70- "s3_output_path" : "s3://mufi-test-serverless-iad/eval/" ,
71- "mlflow_tracking_server_arn" : "arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server" ,
72- "model_package_group_arn" : "arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models" ,
66+ "model_package_arn" : "arn:aws:sagemaker:us-east-1:729646638167:model-package/sdk-test-finetuned-models/65" ,
67+ "dataset_s3_uri" : "s3://sagemaker-us-east-1-729646638167/model-customization/eval/zc_test.jsonl" ,
68+ "s3_output_path" : "s3://sagemaker-us-east-1-729646638167/model-customization/eval/" ,
69+ "model_package_group_arn" : "arn:aws:sagemaker:us-east-1:729646638167:model-package-group/sdk-test-finetuned-models" ,
7370 "region" : "us-east-1" ,
7471}
7572
@@ -288,7 +285,7 @@ def test_benchmark_subtasks_validation(self):
288285
289286 logger .info ("Subtask validation tests passed" )
290287
291- @pytest .mark .skip (reason = "Pipeline creation fails - under investigation" )
288+ # @pytest.mark.skip(reason="Pipeline creation fails - under investigation")
292289 @pytest .mark .gpu_intensive
293290 def test_benchmark_evaluation_base_model_only (self ):
294291 """
@@ -342,16 +339,15 @@ def test_benchmark_evaluation_base_model_only(self):
342339 assert execution .status .overall_status == "Succeeded"
343340 logger .info ("Base model only evaluation completed successfully" )
344341
345- @pytest .mark .skip (reason = "Requires us-east-1 test infrastructure - tracked in AI-5 " )
342+ @pytest .mark .skip (reason = "Pending us-east-1 test infrastructure migration to dedicated test account " )
346343 def test_benchmark_evaluation_nova_model (self ):
347344 """
348345 Test benchmark evaluation with Nova model.
349346
350347 This test uses a Nova fine-tuned model package in us-east-1 region.
351348 Configuration from commented section in benchmark_demo.ipynb.
352349
353- Note: This test is currently skipped. Remove the @pytest.mark.skip decorator
354- when you want to enable it.
350+ Note: This test is currently skipped pending us-east-1 test infra migration.
355351 """
356352 # Get benchmarks
357353 Benchmark = get_benchmarks ()
@@ -363,7 +359,6 @@ def test_benchmark_evaluation_nova_model(self):
363359 benchmark = Benchmark .MMLU ,
364360 model = NOVA_CONFIG ["model_package_arn" ],
365361 s3_output_path = NOVA_CONFIG ["s3_output_path" ],
366- mlflow_resource_arn = NOVA_CONFIG ["mlflow_tracking_server_arn" ],
367362 model_package_group = NOVA_CONFIG ["model_package_group_arn" ],
368363 base_eval_name = "integ-test-nova-eval" ,
369364 region = NOVA_CONFIG ["region" ],
0 commit comments