1212 AsyncStreamingCallbackT ,
1313 ChatMessage ,
1414 ComponentInfo ,
15+ ImageContent ,
1516 StreamingChunk ,
1617 SyncStreamingCallbackT ,
18+ TextContent ,
1719)
1820from haystack .lazy_imports import LazyImport
1921from haystack .utils .auth import Secret
@@ -258,11 +260,17 @@ def convert_message_to_hf_format(message: ChatMessage) -> Dict[str, Any]:
258260 text_contents = message .texts
259261 tool_calls = message .tool_calls
260262 tool_call_results = message .tool_call_results
263+ images = message .images
261264
262- if not text_contents and not tool_calls and not tool_call_results :
263- raise ValueError ("A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, or `ToolCallResult`." )
264- if len (text_contents ) + len (tool_call_results ) > 1 :
265- raise ValueError ("A `ChatMessage` can only contain one `TextContent` or one `ToolCallResult`." )
265+ if not text_contents and not tool_calls and not tool_call_results and not images :
266+ raise ValueError (
267+ "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, `ToolCallResult`, or `ImageContent`."
268+ )
269+ if len (tool_call_results ) > 0 and len (message ._content ) > 1 :
270+ raise ValueError (
271+ "For compatibility with the Hugging Face API, a `ChatMessage` with a `ToolCallResult` "
272+ "cannot contain any other content."
273+ )
266274
267275 # HF always expects a content field, even if it is empty
268276 hf_msg : Dict [str , Any ] = {"role" : message ._role .value , "content" : "" }
@@ -275,8 +283,22 @@ def convert_message_to_hf_format(message: ChatMessage) -> Dict[str, Any]:
275283 # HF does not provide a way to communicate errors in tool invocations, so we ignore the error field
276284 return hf_msg
277285
278- if text_contents :
279- hf_msg ["content" ] = text_contents [0 ]
286+ # Handle multimodal content (text + images) preserving order
287+ if text_contents or images :
288+ content_parts : List [Dict [str , Any ]] = []
289+ for part in message ._content :
290+ if isinstance (part , TextContent ):
291+ content_parts .append ({"type" : "text" , "text" : part .text })
292+ elif isinstance (part , ImageContent ):
293+ image_url = f"data:{ part .mime_type or 'image/jpeg' } ;base64,{ part .base64_image } "
294+ content_parts .append ({"type" : "image_url" , "image_url" : {"url" : image_url }})
295+
296+ if len (content_parts ) == 1 and not images :
297+ # content is a string
298+ hf_msg ["content" ] = content_parts [0 ]["text" ]
299+ else :
300+ hf_msg ["content" ] = content_parts
301+
280302 if tool_calls :
281303 hf_tool_calls = []
282304 for tc in tool_calls :
0 commit comments