diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index c07a31c51e..189cb2b1f2 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -173,7 +173,7 @@ def get_code_hash(step: Entity) -> str: source_code = model_trainer.source_code if source_code: source_dir = source_code.source_dir - requirements = source_code.requirements + requirements = source_code.requirements or [] entry_point = source_code.entry_script return get_training_code_hash(entry_point, source_dir, requirements) return None @@ -209,6 +209,7 @@ def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str] Returns: str: A hash string representing the unique code artifact(s) for the step """ + dependencies = dependencies or [] # FrameworkProcessor if source_dir: diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 5e9ed7bbbd..fcee34edf8 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -241,6 +241,34 @@ def test_get_processing_code_hash_code_only(self): finally: os.unlink(temp_file) + def test_get_processing_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_processing_code_hash with None dependencies and source_dir""" + with tempfile.TemporaryDirectory() as temp_dir: + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") + + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + + def test_get_processing_code_hash_with_none_dependencies_and_code_only(self): + """Test get_processing_code_hash with None dependencies and code only""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('hello')") + temp_file = f.name + + try: + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + + def test_get_processing_code_hash_s3_uri(self): """Test get_processing_code_hash with S3 URI returns None""" result = get_processing_code_hash( @@ -308,6 +336,47 @@ def test_get_training_code_hash_entry_point_only(self): assert len(result_with_deps) == 64 assert result_no_deps != result_with_deps + def test_get_code_hash_training_step_with_none_requirements(self): + """Test get_code_hash with TrainingStep whose source_code has requirements=None""" + from sagemaker.core.workflow.utilities import get_code_hash + + with tempfile.TemporaryDirectory() as temp_dir: + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") + + mock_source_code = Mock() + mock_source_code.source_dir = temp_dir + mock_source_code.requirements = None + mock_source_code.entry_script = str(entry_file) + + mock_model_trainer = Mock() + mock_model_trainer.source_code = mock_source_code + + mock_step_args = Mock() + mock_step_args.func_args = [mock_model_trainer] + + mock_step = Mock() + mock_step.step_args = mock_step_args + + with patch("sagemaker.core.workflow.utilities.isinstance") as mock_isinstance: + def isinstance_side_effect(obj, cls): + from sagemaker.mlops.workflow.steps import TrainingStep, ProcessingStep + if cls is ProcessingStep: + return False + if cls is TrainingStep: + return obj is mock_step + return builtins_isinstance(obj, cls) + + import builtins + builtins_isinstance = builtins.isinstance + mock_isinstance.side_effect = isinstance_side_effect + + result = get_code_hash(mock_step) + + assert result is not None + assert len(result) == 64 + + def test_get_training_code_hash_s3_uri(self): """Test get_training_code_hash with S3 URI returns None""" result = get_training_code_hash(