diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dfafe373..00cdd82f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ SPDX-License-Identifier: MIT-0 - **Configuration Version in Metering Database** — Added `config_version` field to the metering database to enable cost tracking and analytics per configuration version. The metering Glue table now includes a `config_version` column, and all metering Parquet files store the configuration version used for each document. Enables Athena queries to compare costs across different configurations, support A/B testing analytics, and optimize per-version costs. Documents without a config version default to "default". +### Fixed + +- **Application Inference Profile IAM permissions** — Added `application-inference-profile/*` ARN pattern to `bedrock:InvokeModel` IAM policies across all templates (root, appsync, multi-doc-discovery, and sample templates). PR #236 previously fixed only `patterns/unified/template.yaml`; this completes the fix for all Lambda execution roles. Also added `bedrock:GetInferenceProfile` read permission to support prompt caching resolution. ([#272](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/272)) + +- **Prompt caching with application inference profiles** — Fixed `<>` tags being stripped when using Bedrock application inference profile ARNs as model IDs. The cachepoint check now resolves inference profile ARNs to their underlying foundation model via the `GetInferenceProfile` API, enabling prompt caching for profiles that wrap supported models (Claude, Nova). Results are cached to avoid repeated API calls, with graceful fallback if the API call fails. ([#272](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/272)) + ## [0.5.6] ### Added diff --git a/VERSION b/VERSION index b49b25336..c42c877d7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.6 +0.5.7-wip1 diff --git a/lib/idp_common_pkg/idp_common/bedrock/client.py b/lib/idp_common_pkg/idp_common/bedrock/client.py index 18995c970..eac6748d1 100644 --- a/lib/idp_common_pkg/idp_common/bedrock/client.py +++ b/lib/idp_common_pkg/idp_common/bedrock/client.py @@ -66,6 +66,10 @@ class _RequestsConnectTimeout(Exception): DEFAULT_MAX_BACKOFF = 300 # 5 minutes +# Base model names that support cachePoint (without region prefix) +# Used to check inference profiles by resolving their underlying foundation model +_CACHEPOINT_BASE_MODELS = set() + # Models that support cachePoint functionality CACHEPOINT_SUPPORTED_MODELS = [ "us.anthropic.claude-3-5-haiku-20241022-v1:0", @@ -111,6 +115,21 @@ class _RequestsConnectTimeout(Exception): "global.anthropic.claude-opus-4-6-v1:1m", ] +# Build set of base model names (without region/tier prefixes) for inference profile resolution. +# e.g., "us.anthropic.claude-sonnet-4-6" -> "anthropic.claude-sonnet-4-6" +# and "eu.amazon.nova-2-lite-v1:0:priority" -> "amazon.nova-2-lite-v1:0" +for _model_id in CACHEPOINT_SUPPORTED_MODELS: + _parts = _model_id.split(".", 1) + if len(_parts) == 2 and _parts[0] in ("us", "eu", "global"): + _base = _parts[1] + # Strip tier suffixes (:priority, :flex) but keep version suffixes (:0, :1m) + if _base.endswith(":priority") or _base.endswith(":flex"): + _base = _base.rsplit(":", 1)[0] + _CACHEPOINT_BASE_MODELS.add(_base) + +# Module-level cache for inference profile -> cachepoint support resolution +_inference_profile_cachepoint_cache: Dict[str, bool] = {} + class BedrockClient: """Client for interacting with Amazon Bedrock models and custom Lambda hooks.""" @@ -139,6 +158,7 @@ def __init__( self.max_backoff = max_backoff self.metrics_enabled = metrics_enabled self._client = None + self._bedrock_control_client = None self._lambda_client = None self._s3_client = None @@ -164,6 +184,15 @@ def lambda_client(self): ) return self._lambda_client + @property + def bedrock_control_client(self): + """Lazy-loaded Bedrock control plane client for GetInferenceProfile etc.""" + if self._bedrock_control_client is None: + self._bedrock_control_client = boto3.client( + "bedrock", region_name=self.region + ) + return self._bedrock_control_client + @property def s3_client(self): """Lazy-loaded S3 client for LambdaHook image uploads.""" @@ -173,6 +202,93 @@ def s3_client(self): ) return self._s3_client + def _is_model_cachepoint_supported(self, model_id: str) -> bool: + """ + Check if a model supports cachePoint, including inference profile resolution. + + For standard model IDs (e.g., "us.anthropic.claude-sonnet-4-6"), checks + the CACHEPOINT_SUPPORTED_MODELS list directly. + + For inference profile ARNs (containing "inference-profile" or + "application-inference-profile"), resolves the underlying foundation + model via the GetInferenceProfile API and checks if that base model + supports cachePoint. Results are cached to avoid repeated API calls. + + Args: + model_id: Bedrock model ID or inference profile ARN + + Returns: + True if the model (or underlying model for inference profiles) supports cachePoint + """ + # Fast path: direct match against the known list + if model_id in CACHEPOINT_SUPPORTED_MODELS: + return True + + # Check if this is an inference profile ARN + if "inference-profile" not in model_id: + return False + + # Check module-level cache + if model_id in _inference_profile_cachepoint_cache: + cached = _inference_profile_cachepoint_cache[model_id] + logger.debug( + f"Inference profile cachepoint support (cached): {model_id} -> {cached}" + ) + return cached + + # Resolve the inference profile to its underlying foundation model + try: + response = self.bedrock_control_client.get_inference_profile( + inferenceProfileIdentifier=model_id + ) + models = response.get("models", []) + if not models: + logger.warning( + f"Inference profile {model_id} has no models listed. " + "Cannot determine cachePoint support." + ) + _inference_profile_cachepoint_cache[model_id] = False + return False + + # Extract the base model name from the first model's ARN. + # Model ARN format: arn:aws:bedrock:::foundation-model/ + # e.g., "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + first_model_arn = models[0].get("modelArn", "") + if "foundation-model/" in first_model_arn: + base_model_name = first_model_arn.split("foundation-model/")[-1] + else: + logger.warning( + f"Cannot parse foundation model from ARN: {first_model_arn}" + ) + _inference_profile_cachepoint_cache[model_id] = False + return False + + supported = base_model_name in _CACHEPOINT_BASE_MODELS + _inference_profile_cachepoint_cache[model_id] = supported + + logger.info( + f"Resolved inference profile {model_id} -> " + f"foundation model '{base_model_name}' -> " + f"cachePoint {'supported' if supported else 'not supported'}" + ) + return supported + + except ClientError as e: + error_code = e.response["Error"]["Code"] + logger.warning( + f"Failed to resolve inference profile {model_id} for cachePoint check " + f"({error_code}): {e}. Disabling cachePoint for this model." + ) + _inference_profile_cachepoint_cache[model_id] = False + return False + except Exception as e: + logger.warning( + f"Unexpected error resolving inference profile {model_id} " + f"for cachePoint check: {e}. Disabling cachePoint for this model." + ) + _inference_profile_cachepoint_cache[model_id] = False + return False + def __call__( self, model_id: str, @@ -375,7 +491,7 @@ def invoke_model( ) if has_cachepoint_tags: - if model_id in CACHEPOINT_SUPPORTED_MODELS: + if self._is_model_cachepoint_supported(model_id): # Process content for cachePoint tags with supported model processed_content = self._preprocess_content_for_cachepoint(content) logger.info( @@ -394,7 +510,9 @@ def invoke_model( clean_text = item["text"].replace("<>", "") processed_content.append({"text": clean_text}) logger.warning( - f"Removed <> tags for unsupported model: {model_id}. CachePoint is only supported for: {', '.join(CACHEPOINT_SUPPORTED_MODELS)}" + f"Removed <> tags for unsupported model: {model_id}. " + "CachePoint is supported for standard cross-region inference profiles " + "and application inference profiles that wrap supported foundation models." ) else: # Pass through unchanged diff --git a/lib/idp_common_pkg/tests/unit/test_bedrock_cachepoint_inference_profile.py b/lib/idp_common_pkg/tests/unit/test_bedrock_cachepoint_inference_profile.py new file mode 100644 index 000000000..abe894fa1 --- /dev/null +++ b/lib/idp_common_pkg/tests/unit/test_bedrock_cachepoint_inference_profile.py @@ -0,0 +1,358 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +"""Unit tests for BedrockClient cachepoint support with inference profiles.""" + +from unittest.mock import MagicMock + +import pytest +from botocore.exceptions import ClientError +from idp_common.bedrock.client import ( + _CACHEPOINT_BASE_MODELS, + BedrockClient, + _inference_profile_cachepoint_cache, +) + + +@pytest.mark.unit +class TestCachepointBaseModelsSet: + """Test that _CACHEPOINT_BASE_MODELS is built correctly from CACHEPOINT_SUPPORTED_MODELS.""" + + def test_base_models_set_not_empty(self): + """Base models set should contain entries derived from the supported models list.""" + assert len(_CACHEPOINT_BASE_MODELS) > 0 + + def test_base_models_strip_region_prefix(self): + """Base models should have region prefixes (us., eu., global.) stripped.""" + for base_model in _CACHEPOINT_BASE_MODELS: + assert not base_model.startswith("us.") + assert not base_model.startswith("eu.") + assert not base_model.startswith("global.") + + def test_base_models_contain_known_models(self): + """Known foundation model names should be in the base models set.""" + assert "anthropic.claude-sonnet-4-6" in _CACHEPOINT_BASE_MODELS + assert "amazon.nova-pro-v1:0" in _CACHEPOINT_BASE_MODELS + assert "amazon.nova-lite-v1:0" in _CACHEPOINT_BASE_MODELS + assert "amazon.nova-2-lite-v1:0" in _CACHEPOINT_BASE_MODELS + + def test_base_models_strip_tier_suffixes(self): + """Tier suffixes (:priority, :flex) should be stripped from base models.""" + for base_model in _CACHEPOINT_BASE_MODELS: + assert not base_model.endswith(":priority") + assert not base_model.endswith(":flex") + + def test_base_models_preserve_version_suffixes(self): + """Version suffixes (:0, :1m) should be preserved in base models.""" + assert "amazon.nova-pro-v1:0" in _CACHEPOINT_BASE_MODELS + assert "anthropic.claude-sonnet-4-6:1m" in _CACHEPOINT_BASE_MODELS + + +@pytest.mark.unit +class TestIsModelCachepointSupported: + """Test _is_model_cachepoint_supported method with inference profile resolution.""" + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear the inference profile cache before each test.""" + _inference_profile_cachepoint_cache.clear() + yield + _inference_profile_cachepoint_cache.clear() + + @pytest.fixture + def bedrock_client(self): + """Create BedrockClient with mocked clients.""" + client = BedrockClient(region="us-east-1", metrics_enabled=False) + client._client = MagicMock() + client._bedrock_control_client = MagicMock() + return client + + def test_standard_model_direct_match(self, bedrock_client): + """Standard model IDs in CACHEPOINT_SUPPORTED_MODELS should return True.""" + assert ( + bedrock_client._is_model_cachepoint_supported( + "us.anthropic.claude-sonnet-4-6" + ) + is True + ) + + def test_unsupported_standard_model(self, bedrock_client): + """Model IDs not in the list and not inference profiles should return False.""" + assert ( + bedrock_client._is_model_cachepoint_supported("some.unknown.model-v1") + is False + ) + + def test_non_inference_profile_arn(self, bedrock_client): + """ARNs that don't contain 'inference-profile' should return False without API call.""" + result = bedrock_client._is_model_cachepoint_supported( + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + ) + assert result is False + bedrock_client._bedrock_control_client.get_inference_profile.assert_not_called() + + def test_application_inference_profile_supported_model(self, bedrock_client): + """Application inference profile wrapping a supported model should return True.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/abc123" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + }, + { + "modelArn": "arn:aws:bedrock:us-east-2::foundation-model/anthropic.claude-sonnet-4-6" + }, + ], + "inferenceProfileId": "app-profile-abc123", + "status": "ACTIVE", + } + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is True + bedrock_client._bedrock_control_client.get_inference_profile.assert_called_once_with( + inferenceProfileIdentifier=profile_arn + ) + + def test_application_inference_profile_unsupported_model(self, bedrock_client): + """Application inference profile wrapping an unsupported model should return False.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/xyz789" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/some.unsupported-model-v1" + }, + ], + "inferenceProfileId": "app-profile-xyz789", + "status": "ACTIVE", + } + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + + def test_system_inference_profile_supported(self, bedrock_client): + """System-defined inference profiles should also be resolved.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:inference-profile/us.anthropic.claude-sonnet-4-6" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + }, + ], + "inferenceProfileId": "us.anthropic.claude-sonnet-4-6", + "status": "ACTIVE", + } + + # Note: system inference profiles like "us.anthropic.claude-sonnet-4-6" are already + # in CACHEPOINT_SUPPORTED_MODELS and would be caught by the fast path. + # This test covers the case where the full ARN is used instead of the short ID. + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is True + + def test_result_is_cached(self, bedrock_client): + """Resolved results should be cached to avoid repeated API calls.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/cached123" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + }, + ], + } + + # First call - makes API call + result1 = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result1 is True + assert ( + bedrock_client._bedrock_control_client.get_inference_profile.call_count == 1 + ) + + # Second call - uses cache, no additional API call + result2 = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result2 is True + assert ( + bedrock_client._bedrock_control_client.get_inference_profile.call_count == 1 + ) + + def test_empty_models_list(self, bedrock_client): + """Profile with empty models list should return False.""" + profile_arn = ( + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/empty" + ) + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [], + } + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + + def test_unparseable_model_arn(self, bedrock_client): + """Profile with model ARN lacking 'foundation-model/' should return False.""" + profile_arn = ( + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/weird" + ) + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::custom-model/my-fine-tuned-model" + }, + ], + } + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + + def test_client_error_returns_false(self, bedrock_client): + """API errors should return False and cache the result.""" + profile_arn = ( + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/error" + ) + bedrock_client._bedrock_control_client.get_inference_profile.side_effect = ( + ClientError( + { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Profile not found", + } + }, + "GetInferenceProfile", + ) + ) + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + # Should be cached so second call doesn't hit API + assert profile_arn in _inference_profile_cachepoint_cache + assert _inference_profile_cachepoint_cache[profile_arn] is False + + def test_access_denied_returns_false(self, bedrock_client): + """AccessDeniedException should return False gracefully (missing GetInferenceProfile permission).""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/noperm" + bedrock_client._bedrock_control_client.get_inference_profile.side_effect = ( + ClientError( + { + "Error": { + "Code": "AccessDeniedException", + "Message": "Not authorized", + } + }, + "GetInferenceProfile", + ) + ) + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + + def test_unexpected_exception_returns_false(self, bedrock_client): + """Unexpected exceptions should return False gracefully.""" + profile_arn = ( + "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/crash" + ) + bedrock_client._bedrock_control_client.get_inference_profile.side_effect = ( + RuntimeError("boom") + ) + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is False + + def test_nova_model_via_inference_profile(self, bedrock_client): + """Application inference profile wrapping Nova model should return True.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/nova123" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/amazon.nova-pro-v1:0" + }, + ], + } + + result = bedrock_client._is_model_cachepoint_supported(profile_arn) + assert result is True + + +@pytest.mark.unit +class TestCachepointProcessingWithInferenceProfiles: + """Test that invoke_model correctly applies or strips cachepoint tags for inference profiles.""" + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear the inference profile cache before each test.""" + _inference_profile_cachepoint_cache.clear() + yield + _inference_profile_cachepoint_cache.clear() + + @pytest.fixture + def mock_bedrock_response(self): + """Mock Bedrock API response.""" + return { + "output": {"message": {"content": [{"text": "test response"}]}}, + "usage": {"inputTokens": 100, "outputTokens": 50, "totalTokens": 150}, + } + + @pytest.fixture + def bedrock_client(self): + """Create BedrockClient with mocked clients.""" + client = BedrockClient(region="us-east-1", metrics_enabled=False) + client._client = MagicMock() + client._bedrock_control_client = MagicMock() + return client + + def test_cachepoint_applied_for_supported_inference_profile( + self, bedrock_client, mock_bedrock_response + ): + """Cachepoint tags should be processed (not stripped) for supported inference profiles.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/supported" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + }, + ], + } + bedrock_client._client.converse.return_value = mock_bedrock_response + + bedrock_client.invoke_model( + model_id=profile_arn, + system_prompt="test", + content=[{"text": "static content<>dynamic content"}], + ) + + # Verify cachePoint elements were inserted (not stripped) + call_args = bedrock_client._client.converse.call_args + message_content = call_args.kwargs["messages"][0]["content"] + has_cachepoint = any("cachePoint" in item for item in message_content) + assert has_cachepoint, ( + "cachePoint should be inserted for supported inference profile" + ) + + def test_cachepoint_stripped_for_unsupported_inference_profile( + self, bedrock_client, mock_bedrock_response + ): + """Cachepoint tags should be stripped for unsupported inference profiles.""" + profile_arn = "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/unsupported" + bedrock_client._bedrock_control_client.get_inference_profile.return_value = { + "models": [ + { + "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/some.unsupported-model" + }, + ], + } + bedrock_client._client.converse.return_value = mock_bedrock_response + + bedrock_client.invoke_model( + model_id=profile_arn, + system_prompt="test", + content=[{"text": "static<>dynamic"}], + ) + + # Verify cachePoint elements were NOT inserted (tags stripped) + call_args = bedrock_client._client.converse.call_args + message_content = call_args.kwargs["messages"][0]["content"] + has_cachepoint = any("cachePoint" in item for item in message_content) + assert not has_cachepoint, ( + "cachePoint should NOT be inserted for unsupported inference profile" + ) + # But the text content should still be there (just without the tags) + full_text = "".join(item.get("text", "") for item in message_content) + assert "static" in full_text + assert "dynamic" in full_text + assert "<>" not in full_text diff --git a/nested/appsync/extracted_resources.yaml b/nested/appsync/extracted_resources.yaml index 86b228b88..7c8ff3a26 100644 --- a/nested/appsync/extracted_resources.yaml +++ b/nested/appsync/extracted_resources.yaml @@ -283,9 +283,11 @@ - Effect: Allow Action: - "bedrock:InvokeModel" + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe @@ -301,6 +303,7 @@ - "bedrock:GetInferenceProfile" Resource: - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - !If - HasGuardrailConfig - Effect: Allow @@ -1318,14 +1321,17 @@ - Effect: Allow Action: - "bedrock:InvokeModel" + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - "bedrock:GetInferenceProfile" Resource: - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe diff --git a/nested/appsync/template.yaml b/nested/appsync/template.yaml index 1651257ab..a87a1b1e4 100644 --- a/nested/appsync/template.yaml +++ b/nested/appsync/template.yaml @@ -871,9 +871,11 @@ Resources: - Effect: Allow Action: - "bedrock:InvokeModel" + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe @@ -889,6 +891,7 @@ Resources: - "bedrock:GetInferenceProfile" Resource: - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - ssm:GetParameter @@ -1755,10 +1758,12 @@ Resources: - Effect: Allow Action: - bedrock:InvokeModel + - bedrock:GetInferenceProfile - bedrock:Converse Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" # Step Functions permission for multi-document discovery - Effect: Allow Action: @@ -2650,14 +2655,17 @@ Resources: - Effect: Allow Action: - "bedrock:InvokeModel" + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - "bedrock:GetInferenceProfile" Resource: - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe diff --git a/nested/multi-doc-discovery/template.yaml b/nested/multi-doc-discovery/template.yaml index 790f05841..737f78106 100644 --- a/nested/multi-doc-discovery/template.yaml +++ b/nested/multi-doc-discovery/template.yaml @@ -370,6 +370,7 @@ Resources: Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: [cloudwatch:PutMetricData] Resource: "*" @@ -476,10 +477,11 @@ Resources: Action: [kms:Encrypt, kms:Decrypt, kms:ReEncrypt*, kms:GenerateDataKey*, kms:DescribeKey] Resource: !Ref CustomerManagedEncryptionKeyArn - Effect: Allow - Action: [bedrock:InvokeModel, bedrock:InvokeModelWithResponseStream] + Action: [bedrock:InvokeModel, bedrock:InvokeModelWithResponseStream, bedrock:GetInferenceProfile] Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: [cloudwatch:PutMetricData] Resource: "*" @@ -532,10 +534,11 @@ Resources: Action: [kms:Encrypt, kms:Decrypt, kms:ReEncrypt*, kms:GenerateDataKey*, kms:DescribeKey] Resource: !Ref CustomerManagedEncryptionKeyArn - Effect: Allow - Action: [bedrock:InvokeModel, bedrock:InvokeModelWithResponseStream] + Action: [bedrock:InvokeModel, bedrock:InvokeModelWithResponseStream, bedrock:GetInferenceProfile] Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: [cloudwatch:PutMetricData] Resource: "*" diff --git a/patterns/unified/template.yaml b/patterns/unified/template.yaml index ad5918c13..88d8e9d87 100644 --- a/patterns/unified/template.yaml +++ b/patterns/unified/template.yaml @@ -2666,6 +2666,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -2800,6 +2801,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -2938,6 +2940,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -3067,6 +3070,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -3289,6 +3293,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -3417,6 +3422,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -3595,6 +3601,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" @@ -3715,6 +3722,7 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" diff --git a/samples/lambda-hook-inference/GENAIIDP-bedrock-proxy/template.yaml b/samples/lambda-hook-inference/GENAIIDP-bedrock-proxy/template.yaml index 894e249ef..c3778450e 100644 --- a/samples/lambda-hook-inference/GENAIIDP-bedrock-proxy/template.yaml +++ b/samples/lambda-hook-inference/GENAIIDP-bedrock-proxy/template.yaml @@ -55,9 +55,11 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - !If - HasKMSKey - Effect: Allow diff --git a/samples/lambda-hook-inference/template.yaml b/samples/lambda-hook-inference/template.yaml index bdb94a4d6..08a94814c 100644 --- a/samples/lambda-hook-inference/template.yaml +++ b/samples/lambda-hook-inference/template.yaml @@ -91,9 +91,11 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" # ========================================================================= # GENAIIDP-sagemaker-hook diff --git a/template.yaml b/template.yaml index 1e1ca6f4c..481243bb9 100644 --- a/template.yaml +++ b/template.yaml @@ -1169,9 +1169,11 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe @@ -5160,9 +5162,11 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe @@ -5607,9 +5611,11 @@ Resources: Action: - bedrock:InvokeModel - bedrock:InvokeModelWithResponseStream + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe @@ -7226,9 +7232,11 @@ Resources: - Effect: Allow Action: - bedrock:InvokeModel + - bedrock:GetInferenceProfile Resource: - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*" - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:inference-profile/*" + - !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}:${AWS::AccountId}:application-inference-profile/*" - Effect: Allow Action: - aws-marketplace:Subscribe