aws · sagemaker-bot · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder.py b/sagemaker-serve/src/sagemaker/serve/model_builder.py
@@ -45,6 +45,8 @@
     ModelLifeCycle,
     DriftCheckBaselines,
     InferenceComponentComputeResourceRequirements,
+    InferenceComponentDataCacheConfig,
+    InferenceComponentContainerSpecification,
 )
 from sagemaker.core.resources import (
     ModelPackage,
@@ -2978,18 +2980,49 @@ def _deploy_core_endpoint(self, **kwargs):
                 "StartupParameters": startup_parameters,
                 "ComputeResourceRequirements": resources.get_compute_resource_requirements(),
             }
+
+            # Wire optional IC-level parameters into the specification
+            ic_data_cache_config = kwargs.get("data_cache_config")
+            if ic_data_cache_config is not None:
+                resolved_cache_config = self._resolve_data_cache_config(ic_data_cache_config)
+                if resolved_cache_config is not None:
+                    cache_dict = {"EnableCaching": resolved_cache_config.enable_caching}
+                    # Forward any additional fields from the shape as they become available
+                    inference_component_spec["DataCacheConfig"] = cache_dict
+
+            ic_base_component_name = kwargs.get("base_inference_component_name")
+            if ic_base_component_name is not None:
+                inference_component_spec["BaseInferenceComponentName"] = ic_base_component_name
+
+            ic_container = kwargs.get("container")
+            if ic_container is not None:
+                resolved_container = self._resolve_container_spec(ic_container)
+                if resolved_container is not None:
+                    container_dict = {}
+                    if resolved_container.image:
+                        container_dict["Image"] = resolved_container.image
+                    if resolved_container.artifact_url:
+                        container_dict["ArtifactUrl"] = resolved_container.artifact_url
+                    if resolved_container.environment:
+                        container_dict["Environment"] = resolved_container.environment
+                    if container_dict:
+                        inference_component_spec["Container"] = container_dict
+
             runtime_config = {"CopyCount": resources.copy_count}
             self.inference_component_name = (
                 inference_component_name
                 or self.inference_component_name
                 or unique_name_from_base(self.model_name)
             )
 
+            # Use user-provided variant_name or default to "AllTraffic"
+            ic_variant_name = kwargs.get("variant_name", "AllTraffic")
+
             # [TODO]: Add endpoint_logging support
             self.sagemaker_session.create_inference_component(
                 inference_component_name=self.inference_component_name,
                 endpoint_name=self.endpoint_name,
-                variant_name="AllTraffic",  # default variant name
+                variant_name=ic_variant_name,
                 specification=inference_component_spec,
                 runtime_config=runtime_config,
                 tags=tags,
@@ -4127,6 +4160,10 @@ def deploy(
         ] = None,
         custom_orchestrator_instance_type: str = None,
         custom_orchestrator_initial_instance_count: int = None,
+        data_cache_config: Optional[Union["InferenceComponentDataCacheConfig", Dict[str, Any]]] = None,
+        base_inference_component_name: Optional[str] = None,
+        container: Optional[Union["InferenceComponentContainerSpecification", Dict[str, Any]]] = None,
+        variant_name: Optional[str] = None,
         **kwargs,
     ) -> Union[Endpoint, LocalEndpoint, Transformer]:
         """Deploy the built model to an ``Endpoint``.
@@ -4160,6 +4197,22 @@ def deploy(
                 orchestrator deployment. (Default: None).
             custom_orchestrator_initial_instance_count (int, optional): Initial instance count
                 for custom orchestrator deployment. (Default: None).
+            data_cache_config (Union[InferenceComponentDataCacheConfig, dict], optional):
+                Data cache configuration for the inference component. Enables caching of model
+                artifacts and container images on instances for faster auto-scaling cold starts.
+                Can be a dict with 'enable_caching' key (e.g., {'enable_caching': True}) or an
+                InferenceComponentDataCacheConfig instance. (Default: None).
+            base_inference_component_name (str, optional): Name of the base inference component
+                for adapter deployments (e.g., LoRA adapters attached to a base model).
+                (Default: None).
+            container (Union[InferenceComponentContainerSpecification, dict], optional):
+                Custom container specification for the inference component, including image URI,
+                artifact URL, and environment variables. Can be a dict with keys 'image',
+                'artifact_url', 'environment' or an InferenceComponentContainerSpecification
+                instance. (Default: None).
+            variant_name (str, optional): The name of the production variant to deploy to.
+                If not provided (or explicitly ``None``), defaults to ``'AllTraffic'``.
+                (Default: None).
         Returns:
             Union[Endpoint, LocalEndpoint, Transformer]: A ``sagemaker.core.resources.Endpoint``
                 resource representing the deployed endpoint, a ``LocalEndpoint`` for local mode,
@@ -4182,6 +4235,16 @@ def deploy(
         if not hasattr(self, "built_model") and not hasattr(self, "_deployables"):
             raise ValueError("Model needs to be built before deploying")
 
+        # Store IC-level parameters for use in _deploy_core_endpoint
+        if data_cache_config is not None:
+            kwargs["data_cache_config"] = data_cache_config
+        if base_inference_component_name is not None:
+            kwargs["base_inference_component_name"] = base_inference_component_name
+        if container is not None:
+            kwargs["container"] = container
+        if variant_name is not None:
+            kwargs["variant_name"] = variant_name
+
         # Handle model customization deployment
         if self._is_model_customization():
             logger.info("Deploying Model Customization model")

diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py b/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py
@@ -80,6 +80,10 @@ def build(self):
 from sagemaker.core.resources import Model
 
 # MLflow imports
+from sagemaker.core.shapes import (
+    InferenceComponentDataCacheConfig,
+    InferenceComponentContainerSpecification,
+)
 from sagemaker.serve.model_format.mlflow.constants import (
     MLFLOW_METADATA_FILE,
     MLFLOW_MODEL_PATH,
@@ -3369,6 +3373,76 @@ def _extract_speculative_draft_model_provider(
 
         return "auto"
 
+    def _resolve_data_cache_config(
+        self,
+        data_cache_config: Union[InferenceComponentDataCacheConfig, Dict[str, Any], None],
+    ) -> Optional[InferenceComponentDataCacheConfig]:
+        """Resolve data_cache_config to InferenceComponentDataCacheConfig.
+
+        Args:
+            data_cache_config: Either a dict with 'enable_caching' key,
+                an InferenceComponentDataCacheConfig instance, or None.
+
+        Returns:
+            InferenceComponentDataCacheConfig or None.
+
+        Raises:
+            ValueError: If data_cache_config is an unsupported type or dict
+                is missing the required 'enable_caching' key.
+        """
+        if data_cache_config is None:
+            return None
+
+        if isinstance(data_cache_config, InferenceComponentDataCacheConfig):
+            return data_cache_config
+        elif isinstance(data_cache_config, dict):
+            if "enable_caching" not in data_cache_config:
+                raise ValueError(
+                    "data_cache_config dict must contain the required 'enable_caching' key. "
+                    "Example: {'enable_caching': True}"
+                )
+            return InferenceComponentDataCacheConfig(
+                enable_caching=data_cache_config["enable_caching"]
+            )
+        else:
+            raise ValueError(
+                f"data_cache_config must be a dict with 'enable_caching' key or an "
+                f"InferenceComponentDataCacheConfig instance, got {type(data_cache_config)}"
+            )
+
+    def _resolve_container_spec(
+        self,
+        container: Union[InferenceComponentContainerSpecification, Dict[str, Any], None],
+    ) -> Optional[InferenceComponentContainerSpecification]:
+        """Resolve container to InferenceComponentContainerSpecification.
+
+        Args:
+            container: Either a dict with container config keys (image, artifact_url,
+                environment), an InferenceComponentContainerSpecification instance, or None.
+
+        Returns:
+            InferenceComponentContainerSpecification or None.
+
+        Raises:
+            ValueError: If container is an unsupported type.
+        """
+        if container is None:
+            return None
+
+        if isinstance(container, InferenceComponentContainerSpecification):
+            return container
+        elif isinstance(container, dict):
+            # Only pass known keys to avoid Pydantic validation errors
+            # if the model has extra='forbid' configured
+            known_keys = {"image", "artifact_url", "environment"}
+            filtered = {k: v for k, v in container.items() if k in known_keys}
+            return InferenceComponentContainerSpecification(**filtered)
+        else:
+            raise ValueError(
+                f"container must be a dict or an InferenceComponentContainerSpecification "
+                f"instance, got {type(container)}"
+            )
+
     def get_huggingface_model_metadata(
         self, model_id: str, hf_hub_token: Optional[str] = None
     ) -> dict: