Skip to content

Commit 37a1cff

Browse files
committed
Fix
1 parent 3643eb7 commit 37a1cff

3 files changed

Lines changed: 15 additions & 21 deletions

File tree

sagemaker-serve/src/sagemaker/serve/model_builder.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,9 @@ def _resolve_compute_requirements_from_config(
791791
# Extract default compute requirements from metadata
792792
compute_resource_requirements = config.get("ComputeResourceRequirements", {})
793793
default_cpus = compute_resource_requirements.get("NumberOfCpuCoresRequired", 1)
794-
default_memory_mb = compute_resource_requirements.get("MinMemoryRequiredInMb", 1024)
794+
# Use 1024 MB as safe default for min_memory - metadata values can exceed
795+
# SageMaker inference component limits (which are lower than raw EC2 memory)
796+
default_memory_mb = 1024
795797
default_accelerators = compute_resource_requirements.get("NumberOfAcceleratorDevicesRequired")
796798

797799
# Get actual instance resources for validation
@@ -805,14 +807,6 @@ def _resolve_compute_requirements_from_config(
805807
)
806808
default_cpus = actual_cpus
807809

808-
# Adjust memory if it exceeds instance capacity
809-
if actual_memory_mb and default_memory_mb > actual_memory_mb:
810-
logger.warning(
811-
f"Default requirements request {default_memory_mb} MB memory but {instance_type} has {actual_memory_mb} MB. "
812-
f"Adjusting to {actual_memory_mb} MB."
813-
)
814-
default_memory_mb = actual_memory_mb
815-
816810
# Initialize with defaults
817811
final_cpus = default_cpus
818812
final_min_memory = default_memory_mb

sagemaker-serve/tests/unit/test_compute_requirements_resolution.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ def test_resolve_with_defaults_only(self, mock_get_resources, mock_fetch_hub):
7171
user_resource_requirements=None
7272
)
7373

74-
# Verify: Should use defaults from JumpStart
74+
# Verify: Should use safe default memory (1024), CPUs from metadata
7575
assert requirements.number_of_cpu_cores_required == 4
76-
assert requirements.min_memory_required_in_mb == 8192
76+
assert requirements.min_memory_required_in_mb == 1024
7777
# Check that accelerator count is not set (should be Unassigned)
7878
from sagemaker.core.utils.utils import Unassigned
7979
assert isinstance(requirements.number_of_accelerator_devices_required, Unassigned)
@@ -405,21 +405,21 @@ def test_adjust_cpu_count_when_default_exceeds_capacity(self, mock_get_resources
405405
@patch('sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model')
406406
@patch('sagemaker.serve.model_builder.ModelBuilder._get_instance_resources')
407407
def test_adjust_memory_when_default_exceeds_capacity(self, mock_get_resources, mock_fetch_hub):
408-
"""Test automatic memory adjustment when default exceeds instance capacity."""
409-
# Setup: Default requests 32GB but instance only has 8GB
408+
"""Test that default memory is 1024 MB regardless of metadata value."""
409+
# Setup: Metadata requests 32GB but we use safe default of 1024
410410
mock_fetch_hub.return_value = {
411411
"HostingConfigs": [
412412
{
413413
"Profile": "Default",
414414
"ComputeResourceRequirements": {
415415
"NumberOfCpuCoresRequired": 4,
416-
"MinMemoryRequiredInMb": 32768 # 32GB requested
416+
"MinMemoryRequiredInMb": 32768 # This is ignored for defaults
417417
}
418418
}
419419
]
420420
}
421421
mock_get_resources.return_value = (8, 8192) # Only 8GB RAM
422-
422+
423423
builder = ModelBuilder(
424424
model="huggingface-llm-mistral-7b",
425425
model_metadata={
@@ -429,17 +429,17 @@ def test_adjust_memory_when_default_exceeds_capacity(self, mock_get_resources, m
429429
mode=Mode.SAGEMAKER_ENDPOINT,
430430
role_arn="arn:aws:iam::123456789012:role/TestRole",
431431
sagemaker_session=self.mock_session,
432-
instance_type="ml.m5.large" # Provide instance_type to avoid auto-detection
432+
instance_type="ml.m5.large"
433433
)
434-
434+
435435
# Execute
436436
requirements = builder._resolve_compute_requirements(
437437
instance_type="ml.m5.large",
438438
user_resource_requirements=None
439439
)
440-
441-
# Verify: Should adjust to instance capacity
442-
assert requirements.min_memory_required_in_mb == 8192
440+
441+
# Verify: Uses safe default of 1024, not metadata value
442+
assert requirements.min_memory_required_in_mb == 1024
443443

444444
@patch('sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model')
445445
@patch('sagemaker.serve.model_builder.ModelBuilder._get_instance_resources')

sagemaker-serve/tests/unit/test_inference_config_parameter_handling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ def test_inference_config_not_provided_uses_cached_requirements(
345345

346346
# Verify cached requirements were used
347347
assert compute_reqs.number_of_cpu_cores_required == 4
348-
assert compute_reqs.min_memory_required_in_mb == 8192
348+
assert compute_reqs.min_memory_required_in_mb == 1024
349349
assert compute_reqs.number_of_accelerator_devices_required == 4
350350

351351
@patch('sagemaker.core.resources.InferenceComponent.get')

0 commit comments

Comments
 (0)