@@ -831,34 +831,39 @@ def _resolve_compute_requirements_from_config(
831831 final_accelerators = user_resource_requirements .num_accelerators
832832
833833 # Determine accelerator count for GPU instances if not provided
834- if final_accelerators is None :
835- # Check if this is a GPU instance type
836- # GPU families: g5, g4dn, g6, p3, p4d, p4de, p5
837- gpu_patterns = ['.g5.' , '.g4dn.' , '.g6.' , '.p3.' , '.p4d.' , '.p4de.' , '.p5.' ]
838- is_gpu_instance = any (pattern in instance_type for pattern in gpu_patterns )
839-
840- if is_gpu_instance :
841- # Try to infer accelerator count from instance type
842- accelerator_count = self ._infer_accelerator_count_from_instance_type (instance_type )
843- if accelerator_count is not None :
844- final_accelerators = accelerator_count
845- logger .info (
846- f"Inferred { final_accelerators } accelerator device(s) for instance type { instance_type } "
847- )
848- else :
849- # Cannot determine accelerator count - raise descriptive error
850- raise ValueError (
851- f"Instance type '{ instance_type } ' requires accelerator device count specification.\n "
852- f"Please provide ResourceRequirements with number of accelerators:\n \n "
853- f" from sagemaker.core.inference_config import ResourceRequirements\n \n "
854- f" resource_requirements = ResourceRequirements(\n "
855- f" requests={{\n "
856- f" 'num_accelerators': <number_of_gpus>,\n "
857- f" 'memory': { final_min_memory } \n "
858- f" }}\n "
859- f" )\n \n "
860- f"For { instance_type } , check AWS documentation for the number of GPUs available."
861- )
834+ # Also strip accelerator count for CPU instances (AWS rejects it)
835+ gpu_patterns = ['.g5.' , '.g4dn.' , '.g6.' , '.p3.' , '.p4d.' , '.p4de.' , '.p5.' , '.trn' , '.inf' ]
836+ is_gpu_instance = any (pattern in instance_type for pattern in gpu_patterns )
837+
838+ if not is_gpu_instance :
839+ # CPU instance - must NOT include accelerator count
840+ if final_accelerators is not None :
841+ logger .info (
842+ f"Removing accelerator count ({ final_accelerators } ) for CPU instance type { instance_type } "
843+ )
844+ final_accelerators = None
845+ elif final_accelerators is None :
846+ # GPU instance without accelerator count - try to infer
847+ accelerator_count = self ._infer_accelerator_count_from_instance_type (instance_type )
848+ if accelerator_count is not None :
849+ final_accelerators = accelerator_count
850+ logger .info (
851+ f"Inferred { final_accelerators } accelerator device(s) for instance type { instance_type } "
852+ )
853+ else :
854+ # Cannot determine accelerator count - raise descriptive error
855+ raise ValueError (
856+ f"Instance type '{ instance_type } ' requires accelerator device count specification.\n "
857+ f"Please provide ResourceRequirements with number of accelerators:\n \n "
858+ f" from sagemaker.core.inference_config import ResourceRequirements\n \n "
859+ f" resource_requirements = ResourceRequirements(\n "
860+ f" requests={{\n "
861+ f" 'num_accelerators': <number_of_gpus>,\n "
862+ f" 'memory': { final_min_memory } \n "
863+ f" }}\n "
864+ f" )\n \n "
865+ f"For { instance_type } , check AWS documentation for the number of GPUs available."
866+ )
862867
863868 # Validate requirements are compatible with instance type
864869 # Only validate user-provided requirements (defaults are already adjusted above)
0 commit comments