@@ -985,12 +985,112 @@ def _fetch_and_cache_recipe_config(self):
985985 )
986986 return
987987
988+ # Fallback: Nova recipes don't have hosting configs in the hub document
989+ if self ._is_nova_model ():
990+ nova_config = self ._get_nova_hosting_config (instance_type = self .instance_type )
991+ if not self .image_uri :
992+ self .image_uri = nova_config ["image_uri" ]
993+ if not self .env_vars :
994+ self .env_vars = nova_config ["env_vars" ]
995+ if not self .instance_type :
996+ self .instance_type = nova_config ["instance_type" ]
997+ return
998+
988999 raise ValueError (
9891000 f"Model with recipe '{ recipe_name } ' is not supported for deployment. "
9901001 f"The recipe does not have hosting configuration. "
9911002 f"Please use a model that supports deployment or contact AWS support for assistance."
9921003 )
9931004
1005+ # Nova escrow ECR accounts per region
1006+ _NOVA_ESCROW_ACCOUNTS = {
1007+ "us-east-1" : "708977205387" ,
1008+ "us-west-2" : "176779409107" ,
1009+ "eu-west-2" : "470633809225" ,
1010+ "ap-northeast-1" : "878185805882" ,
1011+ }
1012+
1013+ # Nova hosting configs per model (from Rhinestone modelDeployment.ts)
1014+ _NOVA_HOSTING_CONFIGS = {
1015+ "nova-textgeneration-micro" : [
1016+ {"InstanceType" : "ml.g5.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "4096" , "MAX_CONCURRENCY" : "16" }},
1017+ {"InstanceType" : "ml.g5.24xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "8192" , "MAX_CONCURRENCY" : "16" }},
1018+ {"InstanceType" : "ml.g6.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "10000" , "MAX_CONCURRENCY" : "16" }},
1019+ {"InstanceType" : "ml.g6.24xlarge" , "Environment" : {"CONTEXT_LENGTH" : "10000" , "MAX_CONCURRENCY" : "16" }},
1020+ {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1021+ {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1022+ ],
1023+ "nova-textgeneration-lite" : [
1024+ {"InstanceType" : "ml.g6.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "20000" , "MAX_CONCURRENCY" : "16" }},
1025+ {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1026+ ],
1027+ "nova-textgeneration-pro" : [
1028+ {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1029+ {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "50000" , "MAX_CONCURRENCY" : "16" }},
1030+ ],
1031+ "nova-textgeneration-lite-v2" : [
1032+ {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "50000" , "MAX_CONCURRENCY" : "16" }},
1033+ ],
1034+ }
1035+
1036+ def _is_nova_model (self ) -> bool :
1037+ """Check if the model is a Nova model based on recipe name or hub content name."""
1038+ model_package = self ._fetch_model_package ()
1039+ if not model_package :
1040+ return False
1041+ containers = getattr (model_package .inference_specification , "containers" , None )
1042+ if not containers :
1043+ return False
1044+ base_model = getattr (containers [0 ], "base_model" , None )
1045+ if not base_model :
1046+ return False
1047+ recipe_name = getattr (base_model , "recipe_name" , "" ) or ""
1048+ hub_content_name = getattr (base_model , "hub_content_name" , "" ) or ""
1049+ return "nova" in recipe_name .lower () or "nova" in hub_content_name .lower ()
1050+
1051+ def _get_nova_hosting_config (self , instance_type = None ):
1052+ """Get Nova hosting config (image URI, env vars, instance type).
1053+
1054+ Nova training recipes don't have hosting configs in the JumpStart hub document.
1055+ This provides the hardcoded fallback, matching Rhinestone's getNovaHostingConfigs().
1056+ """
1057+ model_package = self ._fetch_model_package ()
1058+ hub_content_name = model_package .inference_specification .containers [0 ].base_model .hub_content_name
1059+
1060+ configs = self ._NOVA_HOSTING_CONFIGS .get (hub_content_name )
1061+ if not configs :
1062+ raise ValueError (
1063+ f"Nova model '{ hub_content_name } ' is not supported for deployment. "
1064+ f"Supported: { list (self ._NOVA_HOSTING_CONFIGS .keys ())} "
1065+ )
1066+
1067+ region = self .sagemaker_session .boto_region_name
1068+ escrow_account = self ._NOVA_ESCROW_ACCOUNTS .get (region )
1069+ if not escrow_account :
1070+ raise ValueError (
1071+ f"Nova deployment is not supported in region '{ region } '. "
1072+ f"Supported: { list (self ._NOVA_ESCROW_ACCOUNTS .keys ())} "
1073+ )
1074+
1075+ image_uri = f"{ escrow_account } .dkr.ecr.{ region } .amazonaws.com/nova-inference-repo:SM-Inference-latest"
1076+
1077+ if instance_type :
1078+ config = next ((c for c in configs if c ["InstanceType" ] == instance_type ), None )
1079+ if not config :
1080+ supported = [c ["InstanceType" ] for c in configs ]
1081+ raise ValueError (
1082+ f"Instance type '{ instance_type } ' not supported for '{ hub_content_name } '. "
1083+ f"Supported: { supported } "
1084+ )
1085+ else :
1086+ config = next ((c for c in configs if c .get ("Profile" ) == "Default" ), configs [0 ])
1087+
1088+ return {
1089+ "image_uri" : image_uri ,
1090+ "env_vars" : config ["Environment" ],
1091+ "instance_type" : config ["InstanceType" ],
1092+ }
1093+
9941094 def _initialize_jumpstart_config (self ) -> None :
9951095 """Initialize JumpStart-specific configuration."""
9961096 if hasattr (self , "hub_name" ) and self .hub_name and not self .hub_arn :
@@ -2217,6 +2317,36 @@ def _build_single_modelbuilder(
22172317 model_package = self ._fetch_model_package ()
22182318 # Fetch recipe config first to set image_uri, instance_type, env_vars, and s3_upload_path
22192319 self ._fetch_and_cache_recipe_config ()
2320+
2321+ # Nova models use a completely different deployment architecture
2322+ if self ._is_nova_model ():
2323+ escrow_uri = self ._resolve_nova_escrow_uri ()
2324+ base_model = model_package .inference_specification .containers [0 ].base_model
2325+
2326+ container_def = ContainerDefinition (
2327+ image = self .image_uri ,
2328+ environment = self .env_vars ,
2329+ model_data_source = {
2330+ "s3_data_source" : {
2331+ "s3_uri" : escrow_uri .rstrip ("/" ) + "/" ,
2332+ "s3_data_type" : "S3Prefix" ,
2333+ "compression_type" : "None" ,
2334+ }
2335+ },
2336+ )
2337+ model_name = self .model_name or f"model-{ uuid .uuid4 ().hex [:10 ]} "
2338+ self .built_model = Model .create (
2339+ execution_role_arn = self .role_arn ,
2340+ model_name = model_name ,
2341+ containers = [container_def ],
2342+ enable_network_isolation = True ,
2343+ tags = [
2344+ {"key" : "sagemaker-studio:jumpstart-model-id" ,
2345+ "value" : base_model .hub_content_name },
2346+ ],
2347+ )
2348+ return self .built_model
2349+
22202350 peft_type = self ._fetch_peft ()
22212351
22222352 if peft_type == "LORA" :
@@ -4207,6 +4337,14 @@ def _deploy_model_customization(
42074337 from sagemaker .core .resources import InferenceComponent
42084338 from sagemaker .core .resources import Tag as CoreTag
42094339
4340+ # Nova models use direct model-on-variant, no InferenceComponents
4341+ if self ._is_nova_model ():
4342+ return self ._deploy_nova_model (
4343+ endpoint_name = endpoint_name ,
4344+ initial_instance_count = initial_instance_count ,
4345+ wait = kwargs .get ("wait" , True ),
4346+ )
4347+
42104348 # Fetch model package
42114349 model_package = self ._fetch_model_package ()
42124350
@@ -4403,6 +4541,91 @@ def _does_endpoint_exist(self, endpoint_name: str) -> bool:
44034541 return False
44044542 raise
44054543
4544+ def _resolve_nova_escrow_uri (self ) -> str :
4545+ """Resolve the escrow S3 URI for Nova model artifacts from manifest.json.
4546+
4547+ Nova training jobs write artifacts to an escrow S3 bucket. The location
4548+ is recorded in manifest.json in the training job output directory.
4549+ """
4550+ import json
4551+ from urllib .parse import urlparse
4552+
4553+ if isinstance (self .model , TrainingJob ):
4554+ training_job = self .model
4555+ elif isinstance (self .model , ModelTrainer ):
4556+ training_job = self .model ._latest_training_job
4557+ else :
4558+ raise ValueError ("Nova escrow URI resolution requires a TrainingJob or ModelTrainer" )
4559+
4560+ output_path = training_job .output_data_config .s3_output_path .rstrip ("/" )
4561+ manifest_s3 = f"{ output_path } /{ training_job .training_job_name } /output/output/manifest.json"
4562+
4563+ parsed = urlparse (manifest_s3 )
4564+ bucket = parsed .netloc
4565+ key = parsed .path .lstrip ("/" )
4566+
4567+ s3_client = self .sagemaker_session .boto_session .client ("s3" )
4568+ resp = s3_client .get_object (Bucket = bucket , Key = key )
4569+ manifest = json .loads (resp ["Body" ].read ().decode ())
4570+
4571+ escrow_uri = manifest .get ("checkpoint_s3_bucket" )
4572+ if not escrow_uri :
4573+ raise ValueError (
4574+ f"'checkpoint_s3_bucket' not found in manifest.json. "
4575+ f"Available keys: { list (manifest .keys ())} "
4576+ )
4577+ return escrow_uri
4578+
4579+ def _deploy_nova_model (
4580+ self ,
4581+ endpoint_name : str ,
4582+ initial_instance_count : int = 1 ,
4583+ wait : bool = True ,
4584+ ) -> Endpoint :
4585+ """Deploy a Nova model directly to an endpoint without inference components.
4586+
4587+ Nova models use a model-on-variant architecture:
4588+ - ModelName is embedded in the ProductionVariant
4589+ - No InferenceComponents are created
4590+ - EnableNetworkIsolation is set on the Model (during build)
4591+ """
4592+ from sagemaker .core .shapes import ProductionVariant
4593+
4594+ model_package = self ._fetch_model_package ()
4595+ base_model = model_package .inference_specification .containers [0 ].base_model
4596+
4597+ if not endpoint_name :
4598+ endpoint_name = f"endpoint-{ uuid .uuid4 ().hex [:8 ]} "
4599+
4600+ EndpointConfig .create (
4601+ endpoint_config_name = endpoint_name ,
4602+ production_variants = [
4603+ ProductionVariant (
4604+ variant_name = "AllTraffic" ,
4605+ model_name = self .built_model .model_name ,
4606+ instance_type = self .instance_type ,
4607+ initial_instance_count = initial_instance_count ,
4608+ )
4609+ ],
4610+ )
4611+
4612+ tags = [
4613+ {"key" : "sagemaker-studio:jumpstart-model-id" , "value" : base_model .hub_content_name },
4614+ ]
4615+ if base_model .recipe_name :
4616+ tags .append ({"key" : "sagemaker-studio:recipe-name" , "value" : base_model .recipe_name })
4617+
4618+ endpoint = Endpoint .create (
4619+ endpoint_name = endpoint_name ,
4620+ endpoint_config_name = endpoint_name ,
4621+ tags = tags ,
4622+ )
4623+
4624+ if wait :
4625+ endpoint .wait_for_status ("InService" )
4626+
4627+ return endpoint
4628+
44064629 @_telemetry_emitter (feature = Feature .MODEL_CUSTOMIZATION , func_name = "model_builder.deploy_local" )
44074630 def deploy_local (
44084631 self , endpoint_name : str = "endpoint" , container_timeout_in_seconds : int = 300 , ** kwargs
0 commit comments