diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index c07a31c51e..88ac23a533 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -210,6 +210,12 @@ def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str] str: A hash string representing the unique code artifact(s) for the step """ + # SourceCode.requirements and other upstream dependency fields default to None + # when not explicitly set. Since this function concatenates dependencies via list + # addition (e.g. [source_dir] + dependencies), we default None to an empty list + # to prevent TypeError. + dependencies = dependencies or [] + # FrameworkProcessor if source_dir: source_dir_url = urlparse(source_dir) diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 5e9ed7bbbd..119379983d 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -214,6 +214,33 @@ def test_get_processing_dependencies_multiple_lists(self): assert result == ["dep1", "dep2", "dep3", "dep4", "dep5"] + def test_get_processing_code_hash_with_none_dependencies_and_code_only(self): + """Test get_processing_code_hash with None dependencies and code only""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('hello')") + temp_file = f.name + + try: + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + + def test_get_processing_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_processing_code_hash with None dependencies and source_dir""" + with tempfile.TemporaryDirectory() as temp_dir: + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") + + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + def test_get_processing_code_hash_with_source_dir(self): """Test get_processing_code_hash with source_dir""" with tempfile.TemporaryDirectory() as temp_dir: @@ -264,6 +291,35 @@ def test_get_processing_code_hash_with_dependencies(self): assert result is not None + def test_get_training_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_training_code_hash with None dependencies and source_dir""" + with tempfile.TemporaryDirectory() as temp_dir: + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") + + result = get_training_code_hash( + entry_point=str(entry_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + + def test_get_training_code_hash_with_none_dependencies_and_entry_point(self): + """Test get_training_code_hash with None dependencies and entry_point only""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('training')") + temp_file = f.name + + try: + result = get_training_code_hash( + entry_point=temp_file, source_dir=None, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + def test_get_training_code_hash_with_source_dir(self): """Test get_training_code_hash with source_dir""" with tempfile.TemporaryDirectory() as temp_dir: