Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ SPDX-License-Identifier: MIT-0

- **Configuration Version in Metering Database** — Added `config_version` field to the metering database to enable cost tracking and analytics per configuration version. The metering Glue table now includes a `config_version` column, and all metering Parquet files store the configuration version used for each document. Enables Athena queries to compare costs across different configurations, support A/B testing analytics, and optimize per-version costs. Documents without a config version default to "default".

### Fixed

- **Application Inference Profile IAM permissions** — Added `application-inference-profile/*` ARN pattern to `bedrock:InvokeModel` IAM policies across all templates (root, appsync, multi-doc-discovery, and sample templates). PR #236 previously fixed only `patterns/unified/template.yaml`; this completes the fix for all Lambda execution roles. Also added `bedrock:GetInferenceProfile` read permission to support prompt caching resolution. ([#272](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/272))

- **Prompt caching with application inference profiles** — Fixed `<<CACHEPOINT>>` tags being stripped when using Bedrock application inference profile ARNs as model IDs. The cachepoint check now resolves inference profile ARNs to their underlying foundation model via the `GetInferenceProfile` API, enabling prompt caching for profiles that wrap supported models (Claude, Nova). Results are cached to avoid repeated API calls, with graceful fallback if the API call fails. ([#272](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/272))

## [0.5.6]

### Added
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.6
0.5.7-wip1
122 changes: 120 additions & 2 deletions lib/idp_common_pkg/idp_common/bedrock/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ class _RequestsConnectTimeout(Exception):
DEFAULT_MAX_BACKOFF = 300 # 5 minutes


# Base model names that support cachePoint (without region prefix)
# Used to check inference profiles by resolving their underlying foundation model
_CACHEPOINT_BASE_MODELS = set()

# Models that support cachePoint functionality
CACHEPOINT_SUPPORTED_MODELS = [
"us.anthropic.claude-3-5-haiku-20241022-v1:0",
Expand Down Expand Up @@ -111,6 +115,21 @@ class _RequestsConnectTimeout(Exception):
"global.anthropic.claude-opus-4-6-v1:1m",
]

# Build set of base model names (without region/tier prefixes) for inference profile resolution.
# e.g., "us.anthropic.claude-sonnet-4-6" -> "anthropic.claude-sonnet-4-6"
# and "eu.amazon.nova-2-lite-v1:0:priority" -> "amazon.nova-2-lite-v1:0"
for _model_id in CACHEPOINT_SUPPORTED_MODELS:
_parts = _model_id.split(".", 1)
if len(_parts) == 2 and _parts[0] in ("us", "eu", "global"):
_base = _parts[1]
# Strip tier suffixes (:priority, :flex) but keep version suffixes (:0, :1m)
if _base.endswith(":priority") or _base.endswith(":flex"):
_base = _base.rsplit(":", 1)[0]
_CACHEPOINT_BASE_MODELS.add(_base)

# Module-level cache for inference profile -> cachepoint support resolution
_inference_profile_cachepoint_cache: Dict[str, bool] = {}


class BedrockClient:
"""Client for interacting with Amazon Bedrock models and custom Lambda hooks."""
Expand Down Expand Up @@ -139,6 +158,7 @@ def __init__(
self.max_backoff = max_backoff
self.metrics_enabled = metrics_enabled
self._client = None
self._bedrock_control_client = None
self._lambda_client = None
self._s3_client = None

Expand All @@ -164,6 +184,15 @@ def lambda_client(self):
)
return self._lambda_client

@property
def bedrock_control_client(self):
"""Lazy-loaded Bedrock control plane client for GetInferenceProfile etc."""
if self._bedrock_control_client is None:
self._bedrock_control_client = boto3.client(
"bedrock", region_name=self.region
)
return self._bedrock_control_client

@property
def s3_client(self):
"""Lazy-loaded S3 client for LambdaHook image uploads."""
Expand All @@ -173,6 +202,93 @@ def s3_client(self):
)
return self._s3_client

def _is_model_cachepoint_supported(self, model_id: str) -> bool:
"""
Check if a model supports cachePoint, including inference profile resolution.

For standard model IDs (e.g., "us.anthropic.claude-sonnet-4-6"), checks
the CACHEPOINT_SUPPORTED_MODELS list directly.

For inference profile ARNs (containing "inference-profile" or
"application-inference-profile"), resolves the underlying foundation
model via the GetInferenceProfile API and checks if that base model
supports cachePoint. Results are cached to avoid repeated API calls.

Args:
model_id: Bedrock model ID or inference profile ARN

Returns:
True if the model (or underlying model for inference profiles) supports cachePoint
"""
# Fast path: direct match against the known list
if model_id in CACHEPOINT_SUPPORTED_MODELS:
return True

# Check if this is an inference profile ARN
if "inference-profile" not in model_id:
return False

# Check module-level cache
if model_id in _inference_profile_cachepoint_cache:
cached = _inference_profile_cachepoint_cache[model_id]
logger.debug(
f"Inference profile cachepoint support (cached): {model_id} -> {cached}"
)
return cached

# Resolve the inference profile to its underlying foundation model
try:
response = self.bedrock_control_client.get_inference_profile(
inferenceProfileIdentifier=model_id
)
models = response.get("models", [])
if not models:
logger.warning(
f"Inference profile {model_id} has no models listed. "
"Cannot determine cachePoint support."
)
_inference_profile_cachepoint_cache[model_id] = False
return False

# Extract the base model name from the first model's ARN.
# Model ARN format: arn:aws:bedrock:<region>::foundation-model/<base-model-name>
# e.g., "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6"
first_model_arn = models[0].get("modelArn", "")
if "foundation-model/" in first_model_arn:
base_model_name = first_model_arn.split("foundation-model/")[-1]
else:
logger.warning(
f"Cannot parse foundation model from ARN: {first_model_arn}"
)
_inference_profile_cachepoint_cache[model_id] = False
return False

supported = base_model_name in _CACHEPOINT_BASE_MODELS
_inference_profile_cachepoint_cache[model_id] = supported

logger.info(
f"Resolved inference profile {model_id} -> "
f"foundation model '{base_model_name}' -> "
f"cachePoint {'supported' if supported else 'not supported'}"
)
return supported

except ClientError as e:
error_code = e.response["Error"]["Code"]
logger.warning(
f"Failed to resolve inference profile {model_id} for cachePoint check "
f"({error_code}): {e}. Disabling cachePoint for this model."
)
_inference_profile_cachepoint_cache[model_id] = False
return False
except Exception as e:
logger.warning(
f"Unexpected error resolving inference profile {model_id} "
f"for cachePoint check: {e}. Disabling cachePoint for this model."
)
_inference_profile_cachepoint_cache[model_id] = False
return False

def __call__(
self,
model_id: str,
Expand Down Expand Up @@ -375,7 +491,7 @@ def invoke_model(
)

if has_cachepoint_tags:
if model_id in CACHEPOINT_SUPPORTED_MODELS:
if self._is_model_cachepoint_supported(model_id):
# Process content for cachePoint tags with supported model
processed_content = self._preprocess_content_for_cachepoint(content)
logger.info(
Expand All @@ -394,7 +510,9 @@ def invoke_model(
clean_text = item["text"].replace("<<CACHEPOINT>>", "")
processed_content.append({"text": clean_text})
logger.warning(
f"Removed <<CACHEPOINT>> tags for unsupported model: {model_id}. CachePoint is only supported for: {', '.join(CACHEPOINT_SUPPORTED_MODELS)}"
f"Removed <<CACHEPOINT>> tags for unsupported model: {model_id}. "
"CachePoint is supported for standard cross-region inference profiles "
"and application inference profiles that wrap supported foundation models."
)
else:
# Pass through unchanged
Expand Down
Loading
Loading