Skip to content

Commit e0c912b

Browse files
committed
add nova model support
1 parent 6004e8b commit e0c912b

1 file changed

Lines changed: 223 additions & 0 deletions

File tree

sagemaker-serve/src/sagemaker/serve/model_builder.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,12 +985,112 @@ def _fetch_and_cache_recipe_config(self):
985985
)
986986
return
987987

988+
# Fallback: Nova recipes don't have hosting configs in the hub document
989+
if self._is_nova_model():
990+
nova_config = self._get_nova_hosting_config(instance_type=self.instance_type)
991+
if not self.image_uri:
992+
self.image_uri = nova_config["image_uri"]
993+
if not self.env_vars:
994+
self.env_vars = nova_config["env_vars"]
995+
if not self.instance_type:
996+
self.instance_type = nova_config["instance_type"]
997+
return
998+
988999
raise ValueError(
9891000
f"Model with recipe '{recipe_name}' is not supported for deployment. "
9901001
f"The recipe does not have hosting configuration. "
9911002
f"Please use a model that supports deployment or contact AWS support for assistance."
9921003
)
9931004

1005+
# Nova escrow ECR accounts per region
1006+
_NOVA_ESCROW_ACCOUNTS = {
1007+
"us-east-1": "708977205387",
1008+
"us-west-2": "176779409107",
1009+
"eu-west-2": "470633809225",
1010+
"ap-northeast-1": "878185805882",
1011+
}
1012+
1013+
# Nova hosting configs per model (from Rhinestone modelDeployment.ts)
1014+
_NOVA_HOSTING_CONFIGS = {
1015+
"nova-textgeneration-micro": [
1016+
{"InstanceType": "ml.g5.12xlarge", "Environment": {"CONTEXT_LENGTH": "4096", "MAX_CONCURRENCY": "16"}},
1017+
{"InstanceType": "ml.g5.24xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "8192", "MAX_CONCURRENCY": "16"}},
1018+
{"InstanceType": "ml.g6.12xlarge", "Environment": {"CONTEXT_LENGTH": "10000", "MAX_CONCURRENCY": "16"}},
1019+
{"InstanceType": "ml.g6.24xlarge", "Environment": {"CONTEXT_LENGTH": "10000", "MAX_CONCURRENCY": "16"}},
1020+
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1021+
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1022+
],
1023+
"nova-textgeneration-lite": [
1024+
{"InstanceType": "ml.g6.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "20000", "MAX_CONCURRENCY": "16"}},
1025+
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1026+
],
1027+
"nova-textgeneration-pro": [
1028+
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1029+
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "50000", "MAX_CONCURRENCY": "16"}},
1030+
],
1031+
"nova-textgeneration-lite-v2": [
1032+
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "50000", "MAX_CONCURRENCY": "16"}},
1033+
],
1034+
}
1035+
1036+
def _is_nova_model(self) -> bool:
1037+
"""Check if the model is a Nova model based on recipe name or hub content name."""
1038+
model_package = self._fetch_model_package()
1039+
if not model_package:
1040+
return False
1041+
containers = getattr(model_package.inference_specification, "containers", None)
1042+
if not containers:
1043+
return False
1044+
base_model = getattr(containers[0], "base_model", None)
1045+
if not base_model:
1046+
return False
1047+
recipe_name = getattr(base_model, "recipe_name", "") or ""
1048+
hub_content_name = getattr(base_model, "hub_content_name", "") or ""
1049+
return "nova" in recipe_name.lower() or "nova" in hub_content_name.lower()
1050+
1051+
def _get_nova_hosting_config(self, instance_type=None):
1052+
"""Get Nova hosting config (image URI, env vars, instance type).
1053+
1054+
Nova training recipes don't have hosting configs in the JumpStart hub document.
1055+
This provides the hardcoded fallback, matching Rhinestone's getNovaHostingConfigs().
1056+
"""
1057+
model_package = self._fetch_model_package()
1058+
hub_content_name = model_package.inference_specification.containers[0].base_model.hub_content_name
1059+
1060+
configs = self._NOVA_HOSTING_CONFIGS.get(hub_content_name)
1061+
if not configs:
1062+
raise ValueError(
1063+
f"Nova model '{hub_content_name}' is not supported for deployment. "
1064+
f"Supported: {list(self._NOVA_HOSTING_CONFIGS.keys())}"
1065+
)
1066+
1067+
region = self.sagemaker_session.boto_region_name
1068+
escrow_account = self._NOVA_ESCROW_ACCOUNTS.get(region)
1069+
if not escrow_account:
1070+
raise ValueError(
1071+
f"Nova deployment is not supported in region '{region}'. "
1072+
f"Supported: {list(self._NOVA_ESCROW_ACCOUNTS.keys())}"
1073+
)
1074+
1075+
image_uri = f"{escrow_account}.dkr.ecr.{region}.amazonaws.com/nova-inference-repo:SM-Inference-latest"
1076+
1077+
if instance_type:
1078+
config = next((c for c in configs if c["InstanceType"] == instance_type), None)
1079+
if not config:
1080+
supported = [c["InstanceType"] for c in configs]
1081+
raise ValueError(
1082+
f"Instance type '{instance_type}' not supported for '{hub_content_name}'. "
1083+
f"Supported: {supported}"
1084+
)
1085+
else:
1086+
config = next((c for c in configs if c.get("Profile") == "Default"), configs[0])
1087+
1088+
return {
1089+
"image_uri": image_uri,
1090+
"env_vars": config["Environment"],
1091+
"instance_type": config["InstanceType"],
1092+
}
1093+
9941094
def _initialize_jumpstart_config(self) -> None:
9951095
"""Initialize JumpStart-specific configuration."""
9961096
if hasattr(self, "hub_name") and self.hub_name and not self.hub_arn:
@@ -2217,6 +2317,36 @@ def _build_single_modelbuilder(
22172317
model_package = self._fetch_model_package()
22182318
# Fetch recipe config first to set image_uri, instance_type, env_vars, and s3_upload_path
22192319
self._fetch_and_cache_recipe_config()
2320+
2321+
# Nova models use a completely different deployment architecture
2322+
if self._is_nova_model():
2323+
escrow_uri = self._resolve_nova_escrow_uri()
2324+
base_model = model_package.inference_specification.containers[0].base_model
2325+
2326+
container_def = ContainerDefinition(
2327+
image=self.image_uri,
2328+
environment=self.env_vars,
2329+
model_data_source={
2330+
"s3_data_source": {
2331+
"s3_uri": escrow_uri.rstrip("/") + "/",
2332+
"s3_data_type": "S3Prefix",
2333+
"compression_type": "None",
2334+
}
2335+
},
2336+
)
2337+
model_name = self.model_name or f"model-{uuid.uuid4().hex[:10]}"
2338+
self.built_model = Model.create(
2339+
execution_role_arn=self.role_arn,
2340+
model_name=model_name,
2341+
containers=[container_def],
2342+
enable_network_isolation=True,
2343+
tags=[
2344+
{"key": "sagemaker-studio:jumpstart-model-id",
2345+
"value": base_model.hub_content_name},
2346+
],
2347+
)
2348+
return self.built_model
2349+
22202350
peft_type = self._fetch_peft()
22212351

22222352
if peft_type == "LORA":
@@ -4207,6 +4337,14 @@ def _deploy_model_customization(
42074337
from sagemaker.core.resources import InferenceComponent
42084338
from sagemaker.core.resources import Tag as CoreTag
42094339

4340+
# Nova models use direct model-on-variant, no InferenceComponents
4341+
if self._is_nova_model():
4342+
return self._deploy_nova_model(
4343+
endpoint_name=endpoint_name,
4344+
initial_instance_count=initial_instance_count,
4345+
wait=kwargs.get("wait", True),
4346+
)
4347+
42104348
# Fetch model package
42114349
model_package = self._fetch_model_package()
42124350

@@ -4403,6 +4541,91 @@ def _does_endpoint_exist(self, endpoint_name: str) -> bool:
44034541
return False
44044542
raise
44054543

4544+
def _resolve_nova_escrow_uri(self) -> str:
4545+
"""Resolve the escrow S3 URI for Nova model artifacts from manifest.json.
4546+
4547+
Nova training jobs write artifacts to an escrow S3 bucket. The location
4548+
is recorded in manifest.json in the training job output directory.
4549+
"""
4550+
import json
4551+
from urllib.parse import urlparse
4552+
4553+
if isinstance(self.model, TrainingJob):
4554+
training_job = self.model
4555+
elif isinstance(self.model, ModelTrainer):
4556+
training_job = self.model._latest_training_job
4557+
else:
4558+
raise ValueError("Nova escrow URI resolution requires a TrainingJob or ModelTrainer")
4559+
4560+
output_path = training_job.output_data_config.s3_output_path.rstrip("/")
4561+
manifest_s3 = f"{output_path}/{training_job.training_job_name}/output/output/manifest.json"
4562+
4563+
parsed = urlparse(manifest_s3)
4564+
bucket = parsed.netloc
4565+
key = parsed.path.lstrip("/")
4566+
4567+
s3_client = self.sagemaker_session.boto_session.client("s3")
4568+
resp = s3_client.get_object(Bucket=bucket, Key=key)
4569+
manifest = json.loads(resp["Body"].read().decode())
4570+
4571+
escrow_uri = manifest.get("checkpoint_s3_bucket")
4572+
if not escrow_uri:
4573+
raise ValueError(
4574+
f"'checkpoint_s3_bucket' not found in manifest.json. "
4575+
f"Available keys: {list(manifest.keys())}"
4576+
)
4577+
return escrow_uri
4578+
4579+
def _deploy_nova_model(
4580+
self,
4581+
endpoint_name: str,
4582+
initial_instance_count: int = 1,
4583+
wait: bool = True,
4584+
) -> Endpoint:
4585+
"""Deploy a Nova model directly to an endpoint without inference components.
4586+
4587+
Nova models use a model-on-variant architecture:
4588+
- ModelName is embedded in the ProductionVariant
4589+
- No InferenceComponents are created
4590+
- EnableNetworkIsolation is set on the Model (during build)
4591+
"""
4592+
from sagemaker.core.shapes import ProductionVariant
4593+
4594+
model_package = self._fetch_model_package()
4595+
base_model = model_package.inference_specification.containers[0].base_model
4596+
4597+
if not endpoint_name:
4598+
endpoint_name = f"endpoint-{uuid.uuid4().hex[:8]}"
4599+
4600+
EndpointConfig.create(
4601+
endpoint_config_name=endpoint_name,
4602+
production_variants=[
4603+
ProductionVariant(
4604+
variant_name="AllTraffic",
4605+
model_name=self.built_model.model_name,
4606+
instance_type=self.instance_type,
4607+
initial_instance_count=initial_instance_count,
4608+
)
4609+
],
4610+
)
4611+
4612+
tags = [
4613+
{"key": "sagemaker-studio:jumpstart-model-id", "value": base_model.hub_content_name},
4614+
]
4615+
if base_model.recipe_name:
4616+
tags.append({"key": "sagemaker-studio:recipe-name", "value": base_model.recipe_name})
4617+
4618+
endpoint = Endpoint.create(
4619+
endpoint_name=endpoint_name,
4620+
endpoint_config_name=endpoint_name,
4621+
tags=tags,
4622+
)
4623+
4624+
if wait:
4625+
endpoint.wait_for_status("InService")
4626+
4627+
return endpoint
4628+
44064629
@_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="model_builder.deploy_local")
44074630
def deploy_local(
44084631
self, endpoint_name: str = "endpoint", container_timeout_in_seconds: int = 300, **kwargs

0 commit comments

Comments
 (0)