From 97579f3c8e07623bfeea7af42eed631c60f88ea1 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:27:38 -0400 Subject: [PATCH 1/3] fix: Pipeline TypeError: can only concatenate list (not "NoneType") to list Using Sou (#5518) --- .../src/sagemaker/core/workflow/utilities.py | 2 + .../tests/unit/workflow/test_utilities.py | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index c07a31c51e..2ceec8c50b 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -149,6 +149,8 @@ def get_code_hash(step: Entity) -> str: Returns: str: A hash string representing the unique code artifact(s) for the step """ + + dependencies = dependencies or [] from sagemaker.mlops.workflow.steps import ProcessingStep, TrainingStep if isinstance(step, ProcessingStep) and step.step_args: diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 5e9ed7bbbd..4a79cb3b37 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -264,6 +264,48 @@ def test_get_processing_code_hash_with_dependencies(self): assert result is not None + def test_get_processing_code_hash_with_none_dependencies(self): + """Test get_processing_code_hash with None dependencies does not raise TypeError""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('hello')") + temp_file = f.name + + try: + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + + def test_get_processing_code_hash_with_source_dir_and_none_dependencies(self): + """Test get_processing_code_hash with source_dir and None dependencies""" + with tempfile.TemporaryDirectory() as temp_dir: + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") + + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + + def test_get_processing_code_hash_with_code_only_and_none_dependencies(self): + """Test get_processing_code_hash with code only and None dependencies""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('hello')") + temp_file = f.name + + try: + # Ensure no TypeError when dependencies is None + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + def test_get_training_code_hash_with_source_dir(self): """Test get_training_code_hash with source_dir""" with tempfile.TemporaryDirectory() as temp_dir: From 2e31aa516b42bad37f1d4f26f5627ad39a4e76b7 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:30:48 -0400 Subject: [PATCH 2/3] fix: address review comments (iteration #1) --- .../tests/unit/workflow/test_utilities.py | 96 +++++++++++-------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 4a79cb3b37..fa798560ef 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -17,6 +17,7 @@ from pathlib import Path from unittest.mock import Mock, patch, MagicMock from sagemaker.core.workflow.utilities import ( + get_code_hash, list_to_request, hash_file, hash_files_or_dirs, @@ -225,86 +226,99 @@ def test_get_processing_code_hash_with_source_dir(self): ) assert result is not None + + def test_get_processing_code_hash_with_source_dir_and_none_dependencies(self): + """Test get_processing_code_hash with source_dir and dependencies=None""" + with tempfile.TemporaryDirectory() as temp_dir: + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") + + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None assert len(result) == 64 - def test_get_processing_code_hash_code_only(self): - """Test get_processing_code_hash with code only""" + def test_get_processing_code_hash_with_code_only_and_none_dependencies(self): + """Test get_processing_code_hash with code only and dependencies=None""" with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("print('hello')") temp_file = f.name try: - result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=[]) + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) assert result is not None assert len(result) == 64 finally: os.unlink(temp_file) - def test_get_processing_code_hash_s3_uri(self): - """Test get_processing_code_hash with S3 URI returns None""" - result = get_processing_code_hash( - code="s3://bucket/script.py", source_dir=None, dependencies=[] - ) - - assert result is None + @pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests") + def test_get_code_hash_with_training_step_none_requirements(self): + """Test get_code_hash with a TrainingStep whose source_code.requirements is None""" + from sagemaker.mlops.workflow.steps import TrainingStep - def test_get_processing_code_hash_with_dependencies(self): - """Test get_processing_code_hash with dependencies""" with tempfile.TemporaryDirectory() as temp_dir: - code_file = Path(temp_dir, "script.py") - code_file.write_text("print('hello')") + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") - dep_file = Path(temp_dir, "utils.py") - dep_file.write_text("def helper(): pass") + mock_source_code = Mock() + mock_source_code.source_dir = temp_dir + mock_source_code.requirements = None + mock_source_code.entry_script = str(entry_file) - result = get_processing_code_hash( - code=str(code_file), source_dir=temp_dir, dependencies=[str(dep_file)] - ) + mock_model_trainer = Mock() + mock_model_trainer.source_code = mock_source_code + + mock_step_args = Mock() + mock_step_args.func_args = [mock_model_trainer] + + step = Mock(spec=TrainingStep) + step.step_args = mock_step_args + + result = get_code_hash(step) assert result is not None + assert len(result) == 64 + assert len(result) == 64 - def test_get_processing_code_hash_with_none_dependencies(self): - """Test get_processing_code_hash with None dependencies does not raise TypeError""" + def test_get_processing_code_hash_code_only(self): + """Test get_processing_code_hash with code only""" with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("print('hello')") temp_file = f.name try: - result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=[]) assert result is not None assert len(result) == 64 finally: os.unlink(temp_file) - def test_get_processing_code_hash_with_source_dir_and_none_dependencies(self): - """Test get_processing_code_hash with source_dir and None dependencies""" + def test_get_processing_code_hash_s3_uri(self): + """Test get_processing_code_hash with S3 URI returns None""" + result = get_processing_code_hash( + code="s3://bucket/script.py", source_dir=None, dependencies=[] + ) + + assert result is None + + def test_get_processing_code_hash_with_dependencies(self): + """Test get_processing_code_hash with dependencies""" with tempfile.TemporaryDirectory() as temp_dir: code_file = Path(temp_dir, "script.py") code_file.write_text("print('hello')") + dep_file = Path(temp_dir, "utils.py") + dep_file.write_text("def helper(): pass") + result = get_processing_code_hash( - code=str(code_file), source_dir=temp_dir, dependencies=None + code=str(code_file), source_dir=temp_dir, dependencies=[str(dep_file)] ) assert result is not None - assert len(result) == 64 - - def test_get_processing_code_hash_with_code_only_and_none_dependencies(self): - """Test get_processing_code_hash with code only and None dependencies""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: - f.write("print('hello')") - temp_file = f.name - - try: - # Ensure no TypeError when dependencies is None - result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) - - assert result is not None - assert len(result) == 64 - finally: - os.unlink(temp_file) def test_get_training_code_hash_with_source_dir(self): """Test get_training_code_hash with source_dir""" From 88142fc79ba6bfd5df7bc2596db4a26eb1bb24b8 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:35:37 -0400 Subject: [PATCH 3/3] fix: address review comments (iteration #2) --- .../src/sagemaker/core/workflow/utilities.py | 5 +- .../tests/unit/workflow/test_utilities.py | 97 +++++++++++-------- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index 2ceec8c50b..189cb2b1f2 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -149,8 +149,6 @@ def get_code_hash(step: Entity) -> str: Returns: str: A hash string representing the unique code artifact(s) for the step """ - - dependencies = dependencies or [] from sagemaker.mlops.workflow.steps import ProcessingStep, TrainingStep if isinstance(step, ProcessingStep) and step.step_args: @@ -175,7 +173,7 @@ def get_code_hash(step: Entity) -> str: source_code = model_trainer.source_code if source_code: source_dir = source_code.source_dir - requirements = source_code.requirements + requirements = source_code.requirements or [] entry_point = source_code.entry_script return get_training_code_hash(entry_point, source_dir, requirements) return None @@ -211,6 +209,7 @@ def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str] Returns: str: A hash string representing the unique code artifact(s) for the step """ + dependencies = dependencies or [] # FrameworkProcessor if source_dir: diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index fa798560ef..fcee34edf8 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -17,7 +17,6 @@ from pathlib import Path from unittest.mock import Mock, patch, MagicMock from sagemaker.core.workflow.utilities import ( - get_code_hash, list_to_request, hash_file, hash_files_or_dirs, @@ -226,77 +225,50 @@ def test_get_processing_code_hash_with_source_dir(self): ) assert result is not None - - def test_get_processing_code_hash_with_source_dir_and_none_dependencies(self): - """Test get_processing_code_hash with source_dir and dependencies=None""" - with tempfile.TemporaryDirectory() as temp_dir: - code_file = Path(temp_dir, "script.py") - code_file.write_text("print('hello')") - - result = get_processing_code_hash( - code=str(code_file), source_dir=temp_dir, dependencies=None - ) - - assert result is not None assert len(result) == 64 - def test_get_processing_code_hash_with_code_only_and_none_dependencies(self): - """Test get_processing_code_hash with code only and dependencies=None""" + def test_get_processing_code_hash_code_only(self): + """Test get_processing_code_hash with code only""" with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("print('hello')") temp_file = f.name try: - result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=[]) assert result is not None assert len(result) == 64 finally: os.unlink(temp_file) - @pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests") - def test_get_code_hash_with_training_step_none_requirements(self): - """Test get_code_hash with a TrainingStep whose source_code.requirements is None""" - from sagemaker.mlops.workflow.steps import TrainingStep - + def test_get_processing_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_processing_code_hash with None dependencies and source_dir""" with tempfile.TemporaryDirectory() as temp_dir: - entry_file = Path(temp_dir, "train.py") - entry_file.write_text("print('training')") - - mock_source_code = Mock() - mock_source_code.source_dir = temp_dir - mock_source_code.requirements = None - mock_source_code.entry_script = str(entry_file) - - mock_model_trainer = Mock() - mock_model_trainer.source_code = mock_source_code - - mock_step_args = Mock() - mock_step_args.func_args = [mock_model_trainer] - - step = Mock(spec=TrainingStep) - step.step_args = mock_step_args + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") - result = get_code_hash(step) + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) assert result is not None assert len(result) == 64 - assert len(result) == 64 - def test_get_processing_code_hash_code_only(self): - """Test get_processing_code_hash with code only""" + def test_get_processing_code_hash_with_none_dependencies_and_code_only(self): + """Test get_processing_code_hash with None dependencies and code only""" with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("print('hello')") temp_file = f.name try: - result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=[]) + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) assert result is not None assert len(result) == 64 finally: os.unlink(temp_file) + def test_get_processing_code_hash_s3_uri(self): """Test get_processing_code_hash with S3 URI returns None""" result = get_processing_code_hash( @@ -364,6 +336,47 @@ def test_get_training_code_hash_entry_point_only(self): assert len(result_with_deps) == 64 assert result_no_deps != result_with_deps + def test_get_code_hash_training_step_with_none_requirements(self): + """Test get_code_hash with TrainingStep whose source_code has requirements=None""" + from sagemaker.core.workflow.utilities import get_code_hash + + with tempfile.TemporaryDirectory() as temp_dir: + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") + + mock_source_code = Mock() + mock_source_code.source_dir = temp_dir + mock_source_code.requirements = None + mock_source_code.entry_script = str(entry_file) + + mock_model_trainer = Mock() + mock_model_trainer.source_code = mock_source_code + + mock_step_args = Mock() + mock_step_args.func_args = [mock_model_trainer] + + mock_step = Mock() + mock_step.step_args = mock_step_args + + with patch("sagemaker.core.workflow.utilities.isinstance") as mock_isinstance: + def isinstance_side_effect(obj, cls): + from sagemaker.mlops.workflow.steps import TrainingStep, ProcessingStep + if cls is ProcessingStep: + return False + if cls is TrainingStep: + return obj is mock_step + return builtins_isinstance(obj, cls) + + import builtins + builtins_isinstance = builtins.isinstance + mock_isinstance.side_effect = isinstance_side_effect + + result = get_code_hash(mock_step) + + assert result is not None + assert len(result) == 64 + + def test_get_training_code_hash_s3_uri(self): """Test get_training_code_hash with S3 URI returns None""" result = get_training_code_hash(