|
| 1 | +""" |
| 2 | +Test to verify that deploy() method passes inference_config to _deploy_model_customization. |
| 3 | +This test validates task 4.4 requirements. |
| 4 | +""" |
| 5 | + |
| 6 | +import unittest |
| 7 | +from unittest.mock import Mock, patch, MagicMock |
| 8 | +import pytest |
| 9 | + |
| 10 | +from sagemaker.serve.model_builder import ModelBuilder |
| 11 | +from sagemaker.serve.mode.function_pointers import Mode |
| 12 | +from sagemaker.core.inference_config import ResourceRequirements # Correct import! |
| 13 | + |
| 14 | + |
| 15 | +class TestDeployPassesInferenceConfig(unittest.TestCase): |
| 16 | + """Test that deploy() correctly passes inference_config to _deploy_model_customization.""" |
| 17 | + |
| 18 | + def setUp(self): |
| 19 | + """Set up test fixtures.""" |
| 20 | + self.mock_session = Mock() |
| 21 | + self.mock_session.boto_region_name = "us-west-2" |
| 22 | + self.mock_session.default_bucket.return_value = "test-bucket" |
| 23 | + self.mock_session.default_bucket_prefix = "test-prefix" |
| 24 | + self.mock_session.config = {} |
| 25 | + self.mock_session.sagemaker_config = {} |
| 26 | + self.mock_session.settings = Mock() |
| 27 | + self.mock_session.settings.include_jumpstart_tags = False |
| 28 | + |
| 29 | + mock_credentials = Mock() |
| 30 | + mock_credentials.access_key = "test-key" |
| 31 | + mock_credentials.secret_key = "test-secret" |
| 32 | + mock_credentials.token = None |
| 33 | + self.mock_session.boto_session = Mock() |
| 34 | + self.mock_session.boto_session.get_credentials.return_value = mock_credentials |
| 35 | + self.mock_session.boto_session.region_name = "us-west-2" |
| 36 | + |
| 37 | + @patch('sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization') |
| 38 | + @patch('sagemaker.serve.model_builder.ModelBuilder._is_model_customization') |
| 39 | + @patch('sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model') |
| 40 | + def test_deploy_passes_inference_config_to_deploy_model_customization( |
| 41 | + self, |
| 42 | + mock_fetch_default_instance, |
| 43 | + mock_is_model_customization, |
| 44 | + mock_deploy_model_customization |
| 45 | + ): |
| 46 | + """Test that deploy() passes inference_config parameter to _deploy_model_customization.""" |
| 47 | + # Setup: Mock model customization check |
| 48 | + mock_is_model_customization.return_value = True |
| 49 | + mock_fetch_default_instance.return_value = "ml.g5.12xlarge" |
| 50 | + |
| 51 | + # Setup: Mock _deploy_model_customization to return a mock endpoint |
| 52 | + mock_endpoint = Mock() |
| 53 | + mock_deploy_model_customization.return_value = mock_endpoint |
| 54 | + |
| 55 | + # Create ModelBuilder |
| 56 | + builder = ModelBuilder( |
| 57 | + model="huggingface-llm-mistral-7b", |
| 58 | + model_metadata={ |
| 59 | + "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b", |
| 60 | + "CUSTOM_MODEL_VERSION": "1.0.0" |
| 61 | + }, |
| 62 | + instance_type="ml.g5.12xlarge", |
| 63 | + mode=Mode.SAGEMAKER_ENDPOINT, |
| 64 | + role_arn="arn:aws:iam::123456789012:role/TestRole", |
| 65 | + sagemaker_session=self.mock_session, |
| 66 | + image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest" |
| 67 | + ) |
| 68 | + |
| 69 | + # Mark as built |
| 70 | + builder.built_model = Mock() |
| 71 | + |
| 72 | + # Create inference_config |
| 73 | + inference_config = ResourceRequirements( |
| 74 | + requests={ |
| 75 | + "num_cpus": 8, |
| 76 | + "memory": 16384, |
| 77 | + "num_accelerators": 4 |
| 78 | + } |
| 79 | + ) |
| 80 | + |
| 81 | + # Execute: Call deploy() with inference_config |
| 82 | + result = builder.deploy( |
| 83 | + endpoint_name="test-endpoint", |
| 84 | + inference_config=inference_config, |
| 85 | + initial_instance_count=1, |
| 86 | + wait=True |
| 87 | + ) |
| 88 | + |
| 89 | + # Verify: _deploy_model_customization was called with inference_config |
| 90 | + assert mock_deploy_model_customization.called |
| 91 | + call_kwargs = mock_deploy_model_customization.call_args[1] |
| 92 | + |
| 93 | + # Verify inference_config was passed through |
| 94 | + assert 'inference_config' in call_kwargs |
| 95 | + assert call_kwargs['inference_config'] == inference_config |
| 96 | + |
| 97 | + # Verify other parameters were also passed |
| 98 | + assert call_kwargs['endpoint_name'] == "test-endpoint" |
| 99 | + assert call_kwargs['initial_instance_count'] == 1 |
| 100 | + assert call_kwargs['wait'] == True |
| 101 | + |
| 102 | + # Verify the result is the mock endpoint |
| 103 | + assert result == mock_endpoint |
| 104 | + |
| 105 | + @patch('sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization') |
| 106 | + @patch('sagemaker.serve.model_builder.ModelBuilder._is_model_customization') |
| 107 | + @patch('sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model') |
| 108 | + def test_deploy_passes_none_when_inference_config_not_provided( |
| 109 | + self, |
| 110 | + mock_fetch_default_instance, |
| 111 | + mock_is_model_customization, |
| 112 | + mock_deploy_model_customization |
| 113 | + ): |
| 114 | + """Test backward compatibility: deploy() passes None when inference_config not provided.""" |
| 115 | + # Setup |
| 116 | + mock_is_model_customization.return_value = True |
| 117 | + mock_fetch_default_instance.return_value = "ml.g5.12xlarge" |
| 118 | + mock_endpoint = Mock() |
| 119 | + mock_deploy_model_customization.return_value = mock_endpoint |
| 120 | + |
| 121 | + builder = ModelBuilder( |
| 122 | + model="huggingface-llm-mistral-7b", |
| 123 | + model_metadata={ |
| 124 | + "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b", |
| 125 | + "CUSTOM_MODEL_VERSION": "1.0.0" |
| 126 | + }, |
| 127 | + instance_type="ml.g5.12xlarge", |
| 128 | + mode=Mode.SAGEMAKER_ENDPOINT, |
| 129 | + role_arn="arn:aws:iam::123456789012:role/TestRole", |
| 130 | + sagemaker_session=self.mock_session, |
| 131 | + image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest" |
| 132 | + ) |
| 133 | + |
| 134 | + builder.built_model = Mock() |
| 135 | + |
| 136 | + # Execute: Call deploy() WITHOUT inference_config |
| 137 | + result = builder.deploy( |
| 138 | + endpoint_name="test-endpoint", |
| 139 | + initial_instance_count=1 |
| 140 | + ) |
| 141 | + |
| 142 | + # Verify: _deploy_model_customization was called with inference_config=None |
| 143 | + assert mock_deploy_model_customization.called |
| 144 | + call_kwargs = mock_deploy_model_customization.call_args[1] |
| 145 | + |
| 146 | + # Verify inference_config is None (backward compatibility) |
| 147 | + assert 'inference_config' in call_kwargs |
| 148 | + assert call_kwargs['inference_config'] is None |
| 149 | + |
| 150 | + @patch('sagemaker.serve.model_builder.ModelBuilder._deploy_model_customization') |
| 151 | + @patch('sagemaker.serve.model_builder.ModelBuilder._is_model_customization') |
| 152 | + @patch('sagemaker.serve.model_builder.ModelBuilder._fetch_default_instance_type_for_custom_model') |
| 153 | + def test_deploy_only_passes_resource_requirements_type( |
| 154 | + self, |
| 155 | + mock_fetch_default_instance, |
| 156 | + mock_is_model_customization, |
| 157 | + mock_deploy_model_customization |
| 158 | + ): |
| 159 | + """Test that deploy() only passes inference_config if it's ResourceRequirements type.""" |
| 160 | + # Setup |
| 161 | + mock_is_model_customization.return_value = True |
| 162 | + mock_fetch_default_instance.return_value = "ml.g5.12xlarge" |
| 163 | + mock_endpoint = Mock() |
| 164 | + mock_deploy_model_customization.return_value = mock_endpoint |
| 165 | + |
| 166 | + builder = ModelBuilder( |
| 167 | + model="huggingface-llm-mistral-7b", |
| 168 | + model_metadata={ |
| 169 | + "CUSTOM_MODEL_ID": "huggingface-llm-mistral-7b", |
| 170 | + "CUSTOM_MODEL_VERSION": "1.0.0" |
| 171 | + }, |
| 172 | + instance_type="ml.g5.12xlarge", |
| 173 | + mode=Mode.SAGEMAKER_ENDPOINT, |
| 174 | + role_arn="arn:aws:iam::123456789012:role/TestRole", |
| 175 | + sagemaker_session=self.mock_session, |
| 176 | + image_uri="123456789012.dkr.ecr.us-west-2.amazonaws.com/test:latest" |
| 177 | + ) |
| 178 | + |
| 179 | + builder.built_model = Mock() |
| 180 | + |
| 181 | + # Create a non-ResourceRequirements inference_config (e.g., ServerlessInferenceConfig) |
| 182 | + from sagemaker.core.inference_config import ServerlessInferenceConfig |
| 183 | + serverless_config = ServerlessInferenceConfig( |
| 184 | + memory_size_in_mb=4096, |
| 185 | + max_concurrency=10 |
| 186 | + ) |
| 187 | + |
| 188 | + # Execute: Call deploy() with ServerlessInferenceConfig |
| 189 | + # This should NOT pass it to _deploy_model_customization |
| 190 | + result = builder.deploy( |
| 191 | + endpoint_name="test-endpoint", |
| 192 | + inference_config=serverless_config |
| 193 | + ) |
| 194 | + |
| 195 | + # Verify: _deploy_model_customization was called with inference_config=None |
| 196 | + # because ServerlessInferenceConfig is not ResourceRequirements |
| 197 | + assert mock_deploy_model_customization.called |
| 198 | + call_kwargs = mock_deploy_model_customization.call_args[1] |
| 199 | + |
| 200 | + # Verify inference_config is None (not ServerlessInferenceConfig) |
| 201 | + assert 'inference_config' in call_kwargs |
| 202 | + assert call_kwargs['inference_config'] is None |
| 203 | + |
| 204 | + |
| 205 | +if __name__ == "__main__": |
| 206 | + unittest.main() |
0 commit comments