From 1b80f495f62cd7f565c12a854fe5cc79291e812c Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:28:29 -0400 Subject: [PATCH 1/2] fix: Pipeline TypeError: can only concatenate list (not "NoneType") to list Using Sou (#5518) --- .../src/sagemaker/core/workflow/utilities.py | 3 + .../tests/unit/workflow/test_utilities.py | 57 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index c07a31c51e..1191e4d181 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -173,6 +173,8 @@ def get_code_hash(step: Entity) -> str: source_code = model_trainer.source_code if source_code: source_dir = source_code.source_dir + # requirements may be None when SourceCode.requirements is not set; + # get_training_code_hash handles None dependencies gracefully requirements = source_code.requirements entry_point = source_code.entry_script return get_training_code_hash(entry_point, source_dir, requirements) @@ -197,6 +199,7 @@ def get_processing_dependencies(dependency_args: List[List[str]]) -> List[str]: def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str]) -> str: + dependencies = dependencies or [] """Get the hash of a processing step's code artifact(s). Args: diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 5e9ed7bbbd..826c71cfc5 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -214,6 +214,35 @@ def test_get_processing_dependencies_multiple_lists(self): assert result == ["dep1", "dep2", "dep3", "dep4", "dep5"] + def test_get_processing_code_hash_with_none_dependencies(self): + """Test get_processing_code_hash does not raise TypeError when dependencies is None""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('hello')") + temp_file = f.name + + try: + # Should not raise TypeError + result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) + + assert result is not None + assert len(result) == 64 + finally: + os.unlink(temp_file) + + def test_get_processing_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_processing_code_hash with source_dir and None dependencies""" + with tempfile.TemporaryDirectory() as temp_dir: + code_file = Path(temp_dir, "script.py") + code_file.write_text("print('hello')") + + # Should not raise TypeError + result = get_processing_code_hash( + code=str(code_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + def test_get_processing_code_hash_with_source_dir(self): """Test get_processing_code_hash with source_dir""" with tempfile.TemporaryDirectory() as temp_dir: @@ -264,6 +293,34 @@ def test_get_processing_code_hash_with_dependencies(self): assert result is not None + def test_get_training_code_hash_with_none_dependencies_and_source_dir(self): + """Test get_training_code_hash with source_dir and None dependencies does not raise""" + with tempfile.TemporaryDirectory() as temp_dir: + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") + + # Should not raise TypeError + result = get_training_code_hash( + entry_point=str(entry_file), source_dir=temp_dir, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + + def test_get_training_code_hash_with_none_dependencies_and_entry_point(self): + """Test get_training_code_hash with entry_point only and None dependencies does not raise""" + with tempfile.TemporaryDirectory() as temp_dir: + entry_file = Path(temp_dir, "train.py") + entry_file.write_text("print('training')") + + # Should not raise TypeError + result = get_training_code_hash( + entry_point=str(entry_file), source_dir=None, dependencies=None + ) + + assert result is not None + assert len(result) == 64 + def test_get_training_code_hash_with_source_dir(self): """Test get_training_code_hash with source_dir""" with tempfile.TemporaryDirectory() as temp_dir: From 7a36f43219fc4eb502925c112a8d3012c019e18f Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:31:34 -0400 Subject: [PATCH 2/2] fix: address review comments (iteration #1) --- .../src/sagemaker/core/workflow/utilities.py | 9 ++++--- .../tests/unit/workflow/test_utilities.py | 25 +++++++++---------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/sagemaker-core/src/sagemaker/core/workflow/utilities.py b/sagemaker-core/src/sagemaker/core/workflow/utilities.py index 1191e4d181..88ac23a533 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/utilities.py +++ b/sagemaker-core/src/sagemaker/core/workflow/utilities.py @@ -173,8 +173,6 @@ def get_code_hash(step: Entity) -> str: source_code = model_trainer.source_code if source_code: source_dir = source_code.source_dir - # requirements may be None when SourceCode.requirements is not set; - # get_training_code_hash handles None dependencies gracefully requirements = source_code.requirements entry_point = source_code.entry_script return get_training_code_hash(entry_point, source_dir, requirements) @@ -199,7 +197,6 @@ def get_processing_dependencies(dependency_args: List[List[str]]) -> List[str]: def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str]) -> str: - dependencies = dependencies or [] """Get the hash of a processing step's code artifact(s). Args: @@ -213,6 +210,12 @@ def get_processing_code_hash(code: str, source_dir: str, dependencies: List[str] str: A hash string representing the unique code artifact(s) for the step """ + # SourceCode.requirements and other upstream dependency fields default to None + # when not explicitly set. Since this function concatenates dependencies via list + # addition (e.g. [source_dir] + dependencies), we default None to an empty list + # to prevent TypeError. + dependencies = dependencies or [] + # FrameworkProcessor if source_dir: source_dir_url = urlparse(source_dir) diff --git a/sagemaker-core/tests/unit/workflow/test_utilities.py b/sagemaker-core/tests/unit/workflow/test_utilities.py index 826c71cfc5..119379983d 100644 --- a/sagemaker-core/tests/unit/workflow/test_utilities.py +++ b/sagemaker-core/tests/unit/workflow/test_utilities.py @@ -214,14 +214,13 @@ def test_get_processing_dependencies_multiple_lists(self): assert result == ["dep1", "dep2", "dep3", "dep4", "dep5"] - def test_get_processing_code_hash_with_none_dependencies(self): - """Test get_processing_code_hash does not raise TypeError when dependencies is None""" + def test_get_processing_code_hash_with_none_dependencies_and_code_only(self): + """Test get_processing_code_hash with None dependencies and code only""" with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: f.write("print('hello')") temp_file = f.name try: - # Should not raise TypeError result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None) assert result is not None @@ -230,12 +229,11 @@ def test_get_processing_code_hash_with_none_dependencies(self): os.unlink(temp_file) def test_get_processing_code_hash_with_none_dependencies_and_source_dir(self): - """Test get_processing_code_hash with source_dir and None dependencies""" + """Test get_processing_code_hash with None dependencies and source_dir""" with tempfile.TemporaryDirectory() as temp_dir: code_file = Path(temp_dir, "script.py") code_file.write_text("print('hello')") - # Should not raise TypeError result = get_processing_code_hash( code=str(code_file), source_dir=temp_dir, dependencies=None ) @@ -294,12 +292,11 @@ def test_get_processing_code_hash_with_dependencies(self): assert result is not None def test_get_training_code_hash_with_none_dependencies_and_source_dir(self): - """Test get_training_code_hash with source_dir and None dependencies does not raise""" + """Test get_training_code_hash with None dependencies and source_dir""" with tempfile.TemporaryDirectory() as temp_dir: entry_file = Path(temp_dir, "train.py") entry_file.write_text("print('training')") - # Should not raise TypeError result = get_training_code_hash( entry_point=str(entry_file), source_dir=temp_dir, dependencies=None ) @@ -308,18 +305,20 @@ def test_get_training_code_hash_with_none_dependencies_and_source_dir(self): assert len(result) == 64 def test_get_training_code_hash_with_none_dependencies_and_entry_point(self): - """Test get_training_code_hash with entry_point only and None dependencies does not raise""" - with tempfile.TemporaryDirectory() as temp_dir: - entry_file = Path(temp_dir, "train.py") - entry_file.write_text("print('training')") + """Test get_training_code_hash with None dependencies and entry_point only""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("print('training')") + temp_file = f.name - # Should not raise TypeError + try: result = get_training_code_hash( - entry_point=str(entry_file), source_dir=None, dependencies=None + entry_point=temp_file, source_dir=None, dependencies=None ) assert result is not None assert len(result) == 64 + finally: + os.unlink(temp_file) def test_get_training_code_hash_with_source_dir(self): """Test get_training_code_hash with source_dir"""