Skip to content

Commit 05e59de

Browse files
committed
fix: ModelBuilder.deploy() should expose DataCacheConfig and other CreateInferenceCom (5750)
1 parent daf19b0 commit 05e59de

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

sagemaker-serve/src/sagemaker/serve/model_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
ModelLifeCycle,
4646
DriftCheckBaselines,
4747
InferenceComponentComputeResourceRequirements,
48+
InferenceComponentDataCacheConfig,
49+
InferenceComponentContainerSpecification,
4850
)
4951
from sagemaker.core.resources import (
5052
ModelPackage,

sagemaker-serve/src/sagemaker/serve/model_builder_utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3369,6 +3369,71 @@ def _extract_speculative_draft_model_provider(
33693369

33703370
return "auto"
33713371

3372+
def _resolve_data_cache_config(self, data_cache_config):
3373+
"""Resolve data_cache_config to InferenceComponentDataCacheConfig.
3374+
3375+
Args:
3376+
data_cache_config: Either a dict with 'enable_caching' key,
3377+
an InferenceComponentDataCacheConfig instance, or None.
3378+
3379+
Returns:
3380+
InferenceComponentDataCacheConfig or None.
3381+
3382+
Raises:
3383+
ValueError: If data_cache_config is an unsupported type.
3384+
"""
3385+
if data_cache_config is None:
3386+
return None
3387+
3388+
from sagemaker.core.shapes import InferenceComponentDataCacheConfig
3389+
3390+
if isinstance(data_cache_config, InferenceComponentDataCacheConfig):
3391+
return data_cache_config
3392+
elif isinstance(data_cache_config, dict):
3393+
return InferenceComponentDataCacheConfig(
3394+
enable_caching=data_cache_config.get("enable_caching", False)
3395+
)
3396+
else:
3397+
raise ValueError(
3398+
f"data_cache_config must be a dict with 'enable_caching' key or an "
3399+
f"InferenceComponentDataCacheConfig instance, got {type(data_cache_config)}"
3400+
)
3401+
3402+
def _resolve_container_spec(self, container):
3403+
"""Resolve container to InferenceComponentContainerSpecification.
3404+
3405+
Args:
3406+
container: Either a dict with container config keys (image, artifact_url,
3407+
environment), an InferenceComponentContainerSpecification instance, or None.
3408+
3409+
Returns:
3410+
InferenceComponentContainerSpecification or None.
3411+
3412+
Raises:
3413+
ValueError: If container is an unsupported type.
3414+
"""
3415+
if container is None:
3416+
return None
3417+
3418+
from sagemaker.core.shapes import InferenceComponentContainerSpecification
3419+
3420+
if isinstance(container, InferenceComponentContainerSpecification):
3421+
return container
3422+
elif isinstance(container, dict):
3423+
kwargs = {}
3424+
if "image" in container:
3425+
kwargs["image"] = container["image"]
3426+
if "artifact_url" in container:
3427+
kwargs["artifact_url"] = container["artifact_url"]
3428+
if "environment" in container:
3429+
kwargs["environment"] = container["environment"]
3430+
return InferenceComponentContainerSpecification(**kwargs)
3431+
else:
3432+
raise ValueError(
3433+
f"container must be a dict or an InferenceComponentContainerSpecification "
3434+
f"instance, got {type(container)}"
3435+
)
3436+
33723437
def get_huggingface_model_metadata(
33733438
self, model_id: str, hf_hub_token: Optional[str] = None
33743439
) -> dict:

0 commit comments

Comments
 (0)