Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sagemaker-core/src/sagemaker/core/workflow/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def get_code_hash(step: Entity) -> str:
Returns:
str: A hash string representing the unique code artifact(s) for the step
"""

Comment thread
aviruthen marked this conversation as resolved.
Outdated
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Fix is applied to the wrong function. This line is inside get_code_hash(step: Entity), which does not have a dependencies parameter. The variable dependencies is not defined in this scope, so this will raise a NameError at runtime.

The fix should be applied inside get_processing_code_hash() instead. Looking at the function signature of get_processing_code_hash, it likely accepts code, source_dir, and dependencies parameters. The defensive dependencies = dependencies or [] should be added at the top of that function body.

Additionally, you should also consider fixing get_code_hash where it calls get_training_code_hash or get_processing_code_hash — if source_code.requirements can be None, ensure it's passed as [] rather than None. For example, where get_code_hash calls these functions, you could do:

dependencies = source_code.requirements or []

before passing it to the downstream hash functions.

dependencies = dependencies or []
from sagemaker.mlops.workflow.steps import ProcessingStep, TrainingStep

if isinstance(step, ProcessingStep) and step.step_args:
Expand Down
56 changes: 56 additions & 0 deletions sagemaker-core/tests/unit/workflow/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
from sagemaker.core.workflow.utilities import (
get_code_hash,
list_to_request,
hash_file,
hash_files_or_dirs,
Expand Down Expand Up @@ -225,6 +226,61 @@ def test_get_processing_code_hash_with_source_dir(self):
)

assert result is not None
Comment thread
aviruthen marked this conversation as resolved.

def test_get_processing_code_hash_with_source_dir_and_none_dependencies(self):
"""Test get_processing_code_hash with source_dir and dependencies=None"""
with tempfile.TemporaryDirectory() as temp_dir:
code_file = Path(temp_dir, "script.py")
code_file.write_text("print('hello')")

result = get_processing_code_hash(
code=str(code_file), source_dir=temp_dir, dependencies=None
)

assert result is not None
assert len(result) == 64

def test_get_processing_code_hash_with_code_only_and_none_dependencies(self):
"""Test get_processing_code_hash with code only and dependencies=None"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write("print('hello')")
temp_file = f.name

try:
result = get_processing_code_hash(code=temp_file, source_dir=None, dependencies=None)

assert result is not None
assert len(result) == 64
Comment thread
aviruthen marked this conversation as resolved.
finally:
os.unlink(temp_file)

Comment thread
aviruthen marked this conversation as resolved.
@pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests")
def test_get_code_hash_with_training_step_none_requirements(self):
"""Test get_code_hash with a TrainingStep whose source_code.requirements is None"""
from sagemaker.mlops.workflow.steps import TrainingStep

with tempfile.TemporaryDirectory() as temp_dir:
entry_file = Path(temp_dir, "train.py")
entry_file.write_text("print('training')")

mock_source_code = Mock()
mock_source_code.source_dir = temp_dir
mock_source_code.requirements = None
mock_source_code.entry_script = str(entry_file)

mock_model_trainer = Mock()
mock_model_trainer.source_code = mock_source_code

mock_step_args = Mock()
mock_step_args.func_args = [mock_model_trainer]

step = Mock(spec=TrainingStep)
step.step_args = mock_step_args

result = get_code_hash(step)

Comment thread
aviruthen marked this conversation as resolved.
assert result is not None
assert len(result) == 64
assert len(result) == 64

Comment thread
aviruthen marked this conversation as resolved.
def test_get_processing_code_hash_code_only(self):
Expand Down
Loading