Skip to content

Commit 1001bf9

Browse files
committed
Bug fix for GetRecordResponse not printable and error message for ingest_dataframe
1 parent 44764ae commit 1001bf9

4 files changed

Lines changed: 153 additions & 0 deletions

File tree

sagemaker-core/src/sagemaker/core/utils/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,22 @@ def __new__(cls):
298298
cls._instance = super().__new__(cls)
299299
return cls._instance
300300

301+
def __repr__(self):
302+
"""Return clean representation for debugging."""
303+
return "Unassigned()"
304+
305+
def __str__(self):
306+
"""Return empty string for clean printing."""
307+
return ""
308+
309+
def __iter__(self):
310+
"""Return empty iterator to prevent iteration errors."""
311+
return iter([])
312+
313+
def __bool__(self):
314+
"""Return False for truthiness checks."""
315+
return False
316+
301317

302318
class SingletonMeta(type):
303319
"""

sagemaker-core/tests/unit/generated/test_utils.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,50 @@ def test_serialize_method_nested_shape():
371371
"StringValue": "string",
372372
},
373373
}
374+
375+
376+
class TestUnassignedBehavior:
377+
"""Test Unassigned class methods for proper behavior.
378+
379+
Bug fix: GetRecordResponse is not printable and cannot be parsed via iterator.
380+
Error: TypeError: 'Unassigned' object is not iterable
381+
"""
382+
383+
def test_unassigned_repr(self):
384+
"""Test that Unassigned has clean repr."""
385+
u = Unassigned()
386+
assert repr(u) == "Unassigned()"
387+
388+
def test_unassigned_str(self):
389+
"""Test that Unassigned converts to empty string."""
390+
u = Unassigned()
391+
assert str(u) == ""
392+
393+
def test_unassigned_bool(self):
394+
"""Test that Unassigned is falsy."""
395+
u = Unassigned()
396+
assert not u
397+
assert bool(u) is False
398+
399+
def test_unassigned_iter(self):
400+
"""Test that Unassigned is iterable and returns empty list."""
401+
u = Unassigned()
402+
result = list(u)
403+
assert result == []
404+
405+
def test_unassigned_singleton(self):
406+
"""Test that Unassigned is a singleton."""
407+
u1 = Unassigned()
408+
u2 = Unassigned()
409+
assert u1 is u2
410+
411+
def test_unassigned_in_conditional(self):
412+
"""Test that Unassigned works correctly in conditionals."""
413+
u = Unassigned()
414+
415+
# Should evaluate to False
416+
if u:
417+
pytest.fail("Unassigned should be falsy")
418+
419+
# Should work with not
420+
assert not u

sagemaker-mlops/src/sagemaker/mlops/feature_store/ingestion_manager_pandas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,17 @@ def run(
8585
wait (bool): whether to wait for the ingestion to finish or not.
8686
timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
8787
if timeout is reached.
88+
89+
Raises:
90+
ValueError: If wait=False with max_workers=1 and max_processes=1.
8891
"""
92+
# Validate async ingestion requirements
93+
if not wait and self.max_workers == 1 and self.max_processes == 1:
94+
raise ValueError(
95+
"Async ingestion (wait=False) requires max_processes > 1 or max_workers > 1. "
96+
"Single-threaded ingestion only supports synchronous mode (wait=True)."
97+
)
98+
8999
if self.max_workers == 1 and self.max_processes == 1:
90100
self._run_single_process_single_thread(data_frame=data_frame, target_stores=target_stores)
91101
else:

sagemaker-mlops/tests/unit/sagemaker/mlops/feature_store/test_ingestion_manager_pandas.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,83 @@ def test_ingest_row_skips_none_values(self, feature_definitions):
254254
# Only id should be in record, name is None
255255
assert len(record) == 1
256256
assert record[0].feature_name == "id"
257+
258+
259+
class TestAsyncIngestionValidation:
260+
"""Test async ingestion validation with max_processes=1.
261+
262+
Bug fix: Error message unclear when trying to use async ingestion with 1 process.
263+
"""
264+
265+
def test_async_with_single_process_single_worker_raises_clear_error(self):
266+
"""Test that wait=False with max_processes=1 and max_workers=1 raises clear error."""
267+
manager = IngestionManagerPandas(
268+
feature_group_name="test-fg",
269+
feature_definitions={"id": {"FeatureType": "String", "CollectionType": None}},
270+
max_workers=1,
271+
max_processes=1,
272+
)
273+
274+
df = pd.DataFrame({"id": ["1", "2", "3"]})
275+
276+
with pytest.raises(ValueError) as exc_info:
277+
manager.run(data_frame=df, wait=False)
278+
279+
error_message = str(exc_info.value)
280+
assert "Async ingestion (wait=False)" in error_message
281+
assert "max_processes > 1 or max_workers > 1" in error_message
282+
assert "wait=True" in error_message
283+
284+
@patch("sagemaker.mlops.feature_store.ingestion_manager_pandas.CoreFeatureGroup")
285+
def test_sync_with_single_process_single_worker_works(self, mock_fg_class):
286+
"""Test that wait=True with max_processes=1 and max_workers=1 works."""
287+
mock_fg = Mock()
288+
mock_fg_class.return_value = mock_fg
289+
290+
manager = IngestionManagerPandas(
291+
feature_group_name="test-fg",
292+
feature_definitions={"id": {"FeatureType": "String", "CollectionType": None}},
293+
max_workers=1,
294+
max_processes=1,
295+
)
296+
297+
df = pd.DataFrame({"id": ["1", "2", "3"]})
298+
299+
# Should not raise validation error
300+
manager.run(data_frame=df, wait=True)
301+
302+
def test_async_with_multiple_workers_no_validation_error(self):
303+
"""Test that wait=False with max_workers > 1 doesn't raise validation error."""
304+
manager = IngestionManagerPandas(
305+
feature_group_name="test-fg",
306+
feature_definitions={"id": {"FeatureType": "String", "CollectionType": None}},
307+
max_workers=2,
308+
max_processes=1,
309+
)
310+
311+
df = pd.DataFrame({"id": ["1", "2", "3"]})
312+
313+
# Should not raise validation error (will fail on actual ingestion, but that's expected)
314+
with pytest.raises(Exception) as exc_info:
315+
manager.run(data_frame=df, wait=False)
316+
317+
# Make sure it's not the validation error
318+
assert "Async ingestion (wait=False)" not in str(exc_info.value)
319+
320+
def test_async_with_multiple_processes_no_validation_error(self):
321+
"""Test that wait=False with max_processes > 1 doesn't raise validation error."""
322+
manager = IngestionManagerPandas(
323+
feature_group_name="test-fg",
324+
feature_definitions={"id": {"FeatureType": "String", "CollectionType": None}},
325+
max_workers=1,
326+
max_processes=2,
327+
)
328+
329+
df = pd.DataFrame({"id": ["1", "2", "3"]})
330+
331+
# Should not raise validation error (will fail on actual ingestion, but that's expected)
332+
with pytest.raises(Exception) as exc_info:
333+
manager.run(data_frame=df, wait=False)
334+
335+
# Make sure it's not the validation error
336+
assert "Async ingestion (wait=False)" not in str(exc_info.value)

0 commit comments

Comments
 (0)