Skip to content

Commit 3643eb7

Browse files
committed
Fix
1 parent d664b18 commit 3643eb7

3 files changed

Lines changed: 38 additions & 136 deletions

File tree

sagemaker-mlops/tests/unit/sagemaker/mlops/feature_store/test_telemetry_integration.py

Lines changed: 0 additions & 104 deletions
This file was deleted.

sagemaker-serve/src/sagemaker/serve/model_builder.py

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -831,34 +831,39 @@ def _resolve_compute_requirements_from_config(
831831
final_accelerators = user_resource_requirements.num_accelerators
832832

833833
# Determine accelerator count for GPU instances if not provided
834-
if final_accelerators is None:
835-
# Check if this is a GPU instance type
836-
# GPU families: g5, g4dn, g6, p3, p4d, p4de, p5
837-
gpu_patterns = ['.g5.', '.g4dn.', '.g6.', '.p3.', '.p4d.', '.p4de.', '.p5.']
838-
is_gpu_instance = any(pattern in instance_type for pattern in gpu_patterns)
839-
840-
if is_gpu_instance:
841-
# Try to infer accelerator count from instance type
842-
accelerator_count = self._infer_accelerator_count_from_instance_type(instance_type)
843-
if accelerator_count is not None:
844-
final_accelerators = accelerator_count
845-
logger.info(
846-
f"Inferred {final_accelerators} accelerator device(s) for instance type {instance_type}"
847-
)
848-
else:
849-
# Cannot determine accelerator count - raise descriptive error
850-
raise ValueError(
851-
f"Instance type '{instance_type}' requires accelerator device count specification.\n"
852-
f"Please provide ResourceRequirements with number of accelerators:\n\n"
853-
f" from sagemaker.core.inference_config import ResourceRequirements\n\n"
854-
f" resource_requirements = ResourceRequirements(\n"
855-
f" requests={{\n"
856-
f" 'num_accelerators': <number_of_gpus>,\n"
857-
f" 'memory': {final_min_memory}\n"
858-
f" }}\n"
859-
f" )\n\n"
860-
f"For {instance_type}, check AWS documentation for the number of GPUs available."
861-
)
834+
# Also strip accelerator count for CPU instances (AWS rejects it)
835+
gpu_patterns = ['.g5.', '.g4dn.', '.g6.', '.p3.', '.p4d.', '.p4de.', '.p5.', '.trn', '.inf']
836+
is_gpu_instance = any(pattern in instance_type for pattern in gpu_patterns)
837+
838+
if not is_gpu_instance:
839+
# CPU instance - must NOT include accelerator count
840+
if final_accelerators is not None:
841+
logger.info(
842+
f"Removing accelerator count ({final_accelerators}) for CPU instance type {instance_type}"
843+
)
844+
final_accelerators = None
845+
elif final_accelerators is None:
846+
# GPU instance without accelerator count - try to infer
847+
accelerator_count = self._infer_accelerator_count_from_instance_type(instance_type)
848+
if accelerator_count is not None:
849+
final_accelerators = accelerator_count
850+
logger.info(
851+
f"Inferred {final_accelerators} accelerator device(s) for instance type {instance_type}"
852+
)
853+
else:
854+
# Cannot determine accelerator count - raise descriptive error
855+
raise ValueError(
856+
f"Instance type '{instance_type}' requires accelerator device count specification.\n"
857+
f"Please provide ResourceRequirements with number of accelerators:\n\n"
858+
f" from sagemaker.core.inference_config import ResourceRequirements\n\n"
859+
f" resource_requirements = ResourceRequirements(\n"
860+
f" requests={{\n"
861+
f" 'num_accelerators': <number_of_gpus>,\n"
862+
f" 'memory': {final_min_memory}\n"
863+
f" }}\n"
864+
f" )\n\n"
865+
f"For {instance_type}, check AWS documentation for the number of GPUs available."
866+
)
862867

863868
# Validate requirements are compatible with instance type
864869
# Only validate user-provided requirements (defaults are already adjusted above)

sagemaker-serve/tests/unit/test_compute_requirements_resolution.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ def test_both_cpu_and_memory_incompatible(self, mock_get_resources, mock_fetch_h
900900
@patch('sagemaker.serve.model_builder.ModelBuilder._fetch_hub_document_for_custom_model')
901901
@patch('sagemaker.serve.model_builder.ModelBuilder._get_instance_resources')
902902
def test_zero_accelerator_count_explicit(self, mock_get_resources, mock_fetch_hub):
903-
"""Test that explicitly setting 0 accelerators works for CPU instances."""
903+
"""Test that explicitly setting 0 accelerators on CPU instance is stripped."""
904904
# Setup
905905
mock_fetch_hub.return_value = {
906906
"HostingConfigs": [
@@ -927,7 +927,7 @@ def test_zero_accelerator_count_explicit(self, mock_get_resources, mock_fetch_hu
927927
instance_type="ml.m5.xlarge"
928928
)
929929

930-
# User explicitly sets 0 accelerators
930+
# User explicitly sets 0 accelerators on a CPU instance
931931
user_requirements = ResourceRequirements(
932932
requests={
933933
"num_accelerators": 0,
@@ -942,8 +942,9 @@ def test_zero_accelerator_count_explicit(self, mock_get_resources, mock_fetch_hu
942942
user_resource_requirements=user_requirements
943943
)
944944

945-
# Verify: Should accept 0 accelerators
946-
assert requirements.number_of_accelerator_devices_required == 0
945+
# Verify: Accelerator count is stripped for CPU instances
946+
from sagemaker.core.utils.utils import Unassigned
947+
assert isinstance(requirements.number_of_accelerator_devices_required, Unassigned)
947948

948949

949950
if __name__ == "__main__":

0 commit comments

Comments
 (0)