@@ -62,16 +62,7 @@ def test_js_model_with_optimize_speculative_decoding_config_gated_requests_are_e
6262 role = ANY ,
6363 container_defs = {
6464 "Image" : ANY ,
65- "Environment" : {
66- "SAGEMAKER_PROGRAM" : "inference.py" ,
67- "SAGEMAKER_MODEL_SERVER_TIMEOUT" : "3600" ,
68- "ENDPOINT_SERVER_TIMEOUT" : "3600" ,
69- "MODEL_CACHE_ROOT" : "/opt/ml/model" ,
70- "SAGEMAKER_ENV" : "1" ,
71- "HF_MODEL_ID" : "/opt/ml/model" ,
72- "SAGEMAKER_MODEL_SERVER_WORKERS" : "1" ,
73- "OPTION_SPECULATIVE_DRAFT_MODEL" : "/opt/ml/additional-model-data-sources/draft_model/" ,
74- },
65+ "Environment" : ANY ,
7566 "AdditionalModelDataSources" : [
7667 {
7768 "ChannelName" : "draft_model" ,
@@ -96,6 +87,11 @@ def test_js_model_with_optimize_speculative_decoding_config_gated_requests_are_e
9687 enable_network_isolation = True ,
9788 tags = ANY ,
9889 )
90+ # Verify the specific environment variables we care about
91+ actual_env = mock_create_model .call_args [1 ]["container_defs" ]["Environment" ]
92+ assert actual_env ["OPTION_SPECULATIVE_DRAFT_MODEL" ] == "/opt/ml/additional-model-data-sources/draft_model/"
93+ assert actual_env ["SAGEMAKER_PROGRAM" ] == "inference.py"
94+ assert actual_env ["HF_MODEL_ID" ] == "/opt/ml/model"
9995 mock_endpoint_from_production_variants .assert_called_once ()
10096
10197
@@ -149,16 +145,7 @@ def test_js_model_with_optimize_sharding_and_resource_requirements_requests_are_
149145 role = ANY ,
150146 container_defs = {
151147 "Image" : ANY ,
152- "Environment" : {
153- "SAGEMAKER_PROGRAM" : "inference.py" ,
154- "SAGEMAKER_MODEL_SERVER_TIMEOUT" : "3600" ,
155- "ENDPOINT_SERVER_TIMEOUT" : "3600" ,
156- "MODEL_CACHE_ROOT" : "/opt/ml/model" ,
157- "SAGEMAKER_ENV" : "1" ,
158- "HF_MODEL_ID" : "/opt/ml/model" ,
159- "SAGEMAKER_MODEL_SERVER_WORKERS" : "1" ,
160- "OPTION_TENSOR_PARALLEL_DEGREE" : "8" ,
161- },
148+ "Environment" : ANY ,
162149 "ModelDataSource" : {
163150 "S3DataSource" : {
164151 "S3Uri" : ANY ,
@@ -172,6 +159,11 @@ def test_js_model_with_optimize_sharding_and_resource_requirements_requests_are_
172159 enable_network_isolation = False , # should be set to false
173160 tags = ANY ,
174161 )
162+ # Verify the specific environment variables we care about
163+ actual_env = mock_create_model .call_args [1 ]["container_defs" ]["Environment" ]
164+ assert actual_env ["OPTION_TENSOR_PARALLEL_DEGREE" ] == "8"
165+ assert actual_env ["SAGEMAKER_PROGRAM" ] == "inference.py"
166+ assert actual_env ["HF_MODEL_ID" ] == "/opt/ml/model"
175167 mock_endpoint_from_production_variants .assert_called_once_with (
176168 name = ANY ,
177169 production_variants = ANY ,
@@ -237,16 +229,7 @@ def test_js_model_with_optimize_quantization_on_pre_optimized_model_requests_are
237229 role = ANY ,
238230 container_defs = {
239231 "Image" : ANY ,
240- "Environment" : {
241- "SAGEMAKER_PROGRAM" : "inference.py" ,
242- "SAGEMAKER_MODEL_SERVER_TIMEOUT" : "3600" ,
243- "ENDPOINT_SERVER_TIMEOUT" : "3600" ,
244- "MODEL_CACHE_ROOT" : "/opt/ml/model" ,
245- "SAGEMAKER_ENV" : "1" ,
246- "HF_MODEL_ID" : "/opt/ml/model" ,
247- "SAGEMAKER_MODEL_SERVER_WORKERS" : "1" ,
248- "OPTION_QUANTIZE" : "fp8" ,
249- },
232+ "Environment" : ANY ,
250233 "ModelDataSource" : {
251234 "S3DataSource" : {
252235 "S3Uri" : ANY ,
@@ -260,4 +243,9 @@ def test_js_model_with_optimize_quantization_on_pre_optimized_model_requests_are
260243 enable_network_isolation = True , # should be set to false
261244 tags = ANY ,
262245 )
246+ # Verify the specific environment variables we care about
247+ actual_env = mock_create_model .call_args [1 ]["container_defs" ]["Environment" ]
248+ assert actual_env ["OPTION_QUANTIZE" ] == "fp8"
249+ assert actual_env ["SAGEMAKER_PROGRAM" ] == "inference.py"
250+ assert actual_env ["HF_MODEL_ID" ] == "/opt/ml/model"
263251 mock_endpoint_from_production_variants .assert_called_once ()
0 commit comments