11import base64
22import json
3+ import os
4+ import re
35from datetime import datetime , timezone
46from typing import Any
57
1113 ChatMessage ,
1214 ChatRole ,
1315 ComponentInfo ,
16+ FileContent ,
1417 FinishReason ,
1518 ImageContent ,
1619 ReasoningContent ,
2629
2730
2831# see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ImageBlock.html for supported formats
29- IMAGE_SUPPORTED_FORMATS = ["png" , "jpeg" , "gif" , "webp" ]
32+ IMAGE_MIME_TYPE_TO_FORMAT : dict [str , str ] = {
33+ "image/png" : "png" ,
34+ "image/jpeg" : "jpeg" ,
35+ "image/jpg" : "jpeg" ,
36+ "image/gif" : "gif" ,
37+ "image/webp" : "webp" ,
38+ }
39+
40+ # https://docs.aws.amazon.com/cli/latest/reference/bedrock-runtime/converse.html
41+ DOCUMENT_MIME_TYPE_TO_FORMAT : dict [str , str ] = {
42+ "application/pdf" : "pdf" ,
43+ "text/csv" : "csv" ,
44+ "application/msword" : "doc" ,
45+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : "docx" ,
46+ "application/vnd.ms-excel" : "xls" ,
47+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" : "xlsx" ,
48+ "text/html" : "html" ,
49+ "text/plain" : "txt" ,
50+ "text/markdown" : "md" ,
51+ }
52+
53+ VIDEO_MIME_TYPE_TO_FORMAT : dict [str , str ] = {
54+ "video/x-matroska" : "mkv" ,
55+ "video/quicktime" : "mov" ,
56+ "video/mp4" : "mp4" ,
57+ "video/webm" : "webm" ,
58+ "video/x-flv" : "flv" ,
59+ "video/mpeg" : "mpeg" ,
60+ "video/x-ms-wmv" : "wmv" ,
61+ "video/3gpp" : "three_gp" ,
62+ }
3063
3164# see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_MessageStopEvent.html
3265FINISH_REASON_MAPPING : dict [str , FinishReason ] = {
@@ -70,11 +103,11 @@ def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dic
70103 Convert a Haystack ImageContent to Bedrock format.
71104 """
72105
73- image_format = image_content . mime_type . split ( "/" )[ - 1 ] if image_content .mime_type else None
74- if image_format not in IMAGE_SUPPORTED_FORMATS :
106+ image_format = IMAGE_MIME_TYPE_TO_FORMAT . get ( image_content .mime_type or "" )
107+ if image_format is None :
75108 err_msg = (
76- f"Unsupported image format : { image_format } . "
77- f"Bedrock supports the following image formats: { IMAGE_SUPPORTED_FORMATS } "
109+ f"Unsupported image MIME type : { image_content . mime_type } . "
110+ f"Bedrock supports the following image formats: { list ( set ( IMAGE_MIME_TYPE_TO_FORMAT . values ())) } "
78111 )
79112 raise ValueError (err_msg )
80113
@@ -83,6 +116,51 @@ def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dic
83116 return {"image" : {"format" : image_format , "source" : source }}
84117
85118
119+ def _convert_file_content_to_bedrock_format (file_content : FileContent ) -> dict [str , Any ]:
120+ """
121+ Convert a Haystack FileContent to Bedrock format.
122+ """
123+
124+ if file_content .mime_type is None :
125+ err_msg = "MIME type is required to use FileContent in Bedrock."
126+ raise ValueError (err_msg )
127+
128+ if doc_format := DOCUMENT_MIME_TYPE_TO_FORMAT .get (file_content .mime_type ):
129+ source = {"bytes" : base64 .b64decode (file_content .base64_data )}
130+
131+ name = "filename"
132+ if file_content .filename :
133+ raw_name = os .path .splitext (file_content .filename )[0 ]
134+ # Bedrock requires name to be present but is very strict about the format.
135+ # See https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html
136+ sanitized_name = re .sub (r"\s+" , " " , re .sub (r"[^a-zA-Z0-9\s\-\[\]()]" , "" , raw_name )).strip ()
137+ if sanitized_name :
138+ name = sanitized_name
139+
140+ doc_block = {
141+ "document" : {
142+ "format" : doc_format ,
143+ "source" : source ,
144+ "name" : name ,
145+ ** ({"context" : file_content .extra ["context" ]} if file_content .extra .get ("context" ) else {}),
146+ ** ({"citations" : file_content .extra ["citations" ]} if file_content .extra .get ("citations" ) else {}),
147+ }
148+ }
149+ return doc_block
150+
151+ if video_format := VIDEO_MIME_TYPE_TO_FORMAT .get (file_content .mime_type ):
152+ source = {"bytes" : base64 .b64decode (file_content .base64_data )}
153+ video_block = {"video" : {"format" : video_format , "source" : source }}
154+ return video_block
155+
156+ err_msg = (
157+ f"Unsupported file content MIME type: { file_content .mime_type } \n "
158+ f"Bedrock supports the following formats:\n - Documents: { list (DOCUMENT_MIME_TYPE_TO_FORMAT .values ())} \n "
159+ f" - Videos: { list (VIDEO_MIME_TYPE_TO_FORMAT .values ())} "
160+ )
161+ raise ValueError (err_msg )
162+
163+
86164def _format_tool_call_message (tool_call_message : ChatMessage ) -> dict [str , Any ]:
87165 """
88166 Format a Haystack ChatMessage containing tool calls into Bedrock format.
@@ -231,31 +309,48 @@ def _format_reasoning_content(reasoning_content: ReasoningContent) -> list[dict[
231309 return formatted_contents
232310
233311
234- def _format_text_image_message (message : ChatMessage ) -> dict [str , Any ]:
312+ def _format_user_message (message : ChatMessage ) -> dict [str , Any ]:
235313 """
236- Format a Haystack ChatMessage containing text and optional image content into Bedrock format.
314+ Format a Haystack user ChatMessage into Bedrock format.
237315
238316 :param message: Haystack ChatMessage.
239317 :returns: Dictionary representing the message in Bedrock's expected format.
240- :raises ValueError: If image content is found in an assistant message or an unsupported image format is used.
241318 """
242319 content_parts = message ._content
243320
244321 bedrock_content_blocks : list [dict [str , Any ]] = []
245- # Add reasoning content if available as the first content block
246- if message .reasoning :
247- bedrock_content_blocks .extend (_format_reasoning_content (reasoning_content = message .reasoning ))
248322
249323 for part in content_parts :
250324 if isinstance (part , TextContent ):
251325 bedrock_content_blocks .append ({"text" : part .text })
252326
253327 elif isinstance (part , ImageContent ):
254- if message .is_from (ChatRole .ASSISTANT ):
255- err_msg = "Image content is not supported for assistant messages"
256- raise ValueError (err_msg )
257328 bedrock_content_blocks .append (_convert_image_content_to_bedrock_format (part ))
258329
330+ elif isinstance (part , FileContent ):
331+ bedrock_content_blocks .append (_convert_file_content_to_bedrock_format (part ))
332+
333+ return {"role" : message .role .value , "content" : bedrock_content_blocks }
334+
335+
336+ def _format_textual_assistant_message (message : ChatMessage ) -> dict [str , Any ]:
337+ """
338+ Format a Haystack assistant ChatMessage containing text and optionally reasoning into Bedrock format.
339+
340+ :param message: Haystack ChatMessage.
341+ :returns: Dictionary representing the message in Bedrock's expected format.
342+ """
343+ content_parts = message ._content
344+
345+ bedrock_content_blocks : list [dict [str , Any ]] = []
346+ # Add reasoning content if available as the first content block
347+ if message .reasoning :
348+ bedrock_content_blocks .extend (_format_reasoning_content (reasoning_content = message .reasoning ))
349+
350+ for part in content_parts :
351+ if isinstance (part , TextContent ):
352+ bedrock_content_blocks .append ({"text" : part .text })
353+
259354 return {"role" : message .role .value , "content" : bedrock_content_blocks }
260355
261356
@@ -314,8 +409,10 @@ def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]],
314409 formatted_msg = _format_tool_call_message (msg )
315410 elif msg .tool_call_results :
316411 formatted_msg = _format_tool_result_message (msg )
317- else :
318- formatted_msg = _format_text_image_message (msg )
412+ elif msg .is_from (ChatRole .USER ):
413+ formatted_msg = _format_user_message (msg )
414+ elif msg .is_from (ChatRole .ASSISTANT ):
415+ formatted_msg = _format_textual_assistant_message (msg )
319416 if cache_point :
320417 formatted_msg ["content" ].append (cache_point )
321418 bedrock_formatted_messages .append (formatted_msg )
@@ -386,6 +483,14 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
386483 if "redactedContent" in reasoning_content :
387484 reasoning_content ["redacted_content" ] = reasoning_content .pop ("redactedContent" )
388485 reasoning_contents .append ({"reasoning_content" : reasoning_content })
486+ elif "citationsContent" in content_block :
487+ citations_content = content_block ["citationsContent" ]
488+ meta ["citations" ] = citations_content
489+ if "content" in citations_content :
490+ for entry in citations_content ["content" ]:
491+ text = entry .get ("text" , "" )
492+ if text .strip ():
493+ text_content .append (text )
389494
390495 reasoning_text = ""
391496 for content in reasoning_contents :
@@ -397,7 +502,7 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
397502 # Create a single ChatMessage with combined text and tool calls
398503 replies .append (
399504 ChatMessage .from_assistant (
400- " " .join (text_content ),
505+ "" .join (text_content ),
401506 tool_calls = tool_calls ,
402507 meta = meta ,
403508 reasoning = ReasoningContent (
0 commit comments