Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/anthropic/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.23.0", "anthropic>=0.47.0"]
dependencies = ["haystack-ai>=2.24.1", "anthropic>=0.47.0"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/anthropic#readme"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Any, Literal, cast, get_args
from typing import Any, Literal, TypeAlias, cast, get_args

from haystack.dataclasses.chat_message import (
ChatMessage,
ChatRole,
FileContent,
ReasoningContent,
TextContent,
ToolCall,
Expand All @@ -18,6 +19,8 @@

from anthropic.resources.messages.messages import RawMessageStreamEvent
from anthropic.types import (
Base64PDFSourceParam,
DocumentBlockParam,
ImageBlockParam,
MessageParam,
RedactedThinkingBlockParam,
Expand All @@ -31,7 +34,6 @@
ImageFormat = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
IMAGE_SUPPORTED_FORMATS: list[ImageFormat] = list(get_args(ImageFormat))


# Mapping from Anthropic stop reasons to Haystack FinishReason values
FINISH_REASON_MAPPING: dict[str, FinishReason] = {
"end_turn": "stop",
Expand All @@ -42,6 +44,16 @@
"tool_use": "tool_calls",
}

AnthropicContentBlocks: TypeAlias = list[
ImageBlockParam
| DocumentBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
| ToolUseBlockParam
| ToolResultBlockParam
| TextBlockParam
]


def _convert_image_content_to_anthropic_format(image_content: ImageContent) -> ImageBlockParam:
"""
Expand All @@ -65,16 +77,31 @@ def _convert_image_content_to_anthropic_format(image_content: ImageContent) -> I
)


def _convert_file_content_to_anthropic_format(file_content: FileContent) -> DocumentBlockParam:
"""
Convert a FileContent to the format expected by Anthropic Chat API.
"""
if file_content.mime_type != "application/pdf":
msg = f"Unsupported file format: {file_content.mime_type}. Anthropic supports only PDF files."
raise ValueError(msg)

source = Base64PDFSourceParam(
type="base64",
media_type="application/pdf",
data=file_content.base64_data,
)

return DocumentBlockParam(
type="document",
source=source,
context=file_content.extra.get("context", None),
title=file_content.extra.get("title", None),
)


def _update_anthropic_message_with_tool_call_results(
tool_call_results: list[ToolCallResult],
content: list[
TextBlockParam
| ToolUseBlockParam
| ToolResultBlockParam
| ImageBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
],
content: AnthropicContentBlocks,
) -> None:
"""
Update an Anthropic message content list with tool call results.
Expand Down Expand Up @@ -161,14 +188,7 @@ def _convert_messages_to_anthropic_format(
i += 1
continue

content: list[
TextBlockParam
| ToolUseBlockParam
| ToolResultBlockParam
| ImageBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
] = []
content: AnthropicContentBlocks = []

# Handle multimodal content (text and images) preserving order
for part in message._content:
Expand Down Expand Up @@ -204,6 +224,14 @@ def _convert_messages_to_anthropic_format(
if cache_control:
image_block["cache_control"] = cache_control
content.append(image_block)
elif isinstance(part, FileContent):
if not message.is_from(ChatRole.USER):
msg = "File content is only supported for user messages"
raise ValueError(msg)
document_block = _convert_file_content_to_anthropic_format(part)
if cache_control:
document_block["cache_control"] = cache_control
content.append(document_block)

if message.tool_calls:
tool_use_blocks = _convert_tool_calls_to_anthropic_format(message.tool_calls)
Expand Down
30 changes: 30 additions & 0 deletions integrations/anthropic/tests/test_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from haystack.dataclasses import (
ChatMessage,
ChatRole,
FileContent,
ImageContent,
StreamingChunk,
TextContent,
Expand Down Expand Up @@ -1265,6 +1266,35 @@ def test_live_run_multimodal(self, test_files_path):
assert len(message.text) > 0
assert any(word in message.text.lower() for word in ["apple", "fruit", "red"])

@pytest.mark.integration
@pytest.mark.skipif(
not os.environ.get("ANTHROPIC_API_KEY", None),
reason="Export an env var called ANTHROPIC_API_KEY containing the Anthropic token to run this test.",
)
def test_live_run_with_file_content(self, test_files_path):
pdf_path = test_files_path / "sample_pdf_3.pdf"

file_content = FileContent.from_file_path(
file_path=pdf_path, extra={"context": "This document contains a table", "title": "A nice PDF"}
)

chat_messages = [
ChatMessage.from_user(
content_parts=[file_content, "Is this document a paper about LLMs? Respond with 'yes' or 'no' only."]
)
]

generator = AnthropicChatGenerator(model="claude-haiku-4-5")
results = generator.run(chat_messages)

assert len(results["replies"]) == 1
message: ChatMessage = results["replies"][0]

assert message.is_from(ChatRole.ASSISTANT)

assert message.text
assert "no" in message.text.lower()

@pytest.mark.integration
@pytest.mark.skipif(
not os.environ.get("ANTHROPIC_API_KEY", None),
Expand Down
Binary file not shown.
42 changes: 42 additions & 0 deletions integrations/anthropic/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json

import pytest
Expand All @@ -23,6 +24,7 @@
ChatMessage,
ChatRole,
ComponentInfo,
FileContent,
ImageContent,
StreamingChunk,
TextContent,
Expand All @@ -38,6 +40,7 @@
FINISH_REASON_MAPPING,
_convert_anthropic_chunk_to_streaming_chunk,
_convert_chat_completion_to_chat_message,
_convert_file_content_to_anthropic_format,
_convert_image_content_to_anthropic_format,
_convert_messages_to_anthropic_format,
_finalize_reasoning_group,
Expand Down Expand Up @@ -705,6 +708,13 @@ def test_convert_image_content_to_anthropic_format_with_none_mime_type(self):
with pytest.raises(ValueError, match="Unsupported image format: None"):
_convert_image_content_to_anthropic_format(image_content)

def test_convert_file_content_to_anthropic_format_with_unsupported_mime_type(self):
base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
file_content = FileContent(base64_data=base64_data, mime_type="image/png")

with pytest.raises(ValueError, match="Unsupported file format: image/png"):
_convert_file_content_to_anthropic_format(file_content)

def test_convert_message_to_anthropic_format_from_system(self):
messages = [ChatMessage.from_system("You are good assistant")]
assert _convert_messages_to_anthropic_format(messages) == (
Expand Down Expand Up @@ -920,6 +930,31 @@ def test_convert_message_to_anthropic_format_with_image(self):
assert anthropic_message["content"][1]["source"]["media_type"] == "image/png"
assert anthropic_message["content"][1]["source"]["data"] == base64_image

def test_convert_message_to_anthropic_format_with_file_content(self, test_files_path):
pdf_path = test_files_path / "sample_pdf_3.pdf"
with open(pdf_path, "rb") as f:
base64_data = base64.b64encode(f.read()).decode("utf-8")

extra = {"context": "This document contains a table", "title": "A nice PDF"}
file_content = FileContent(base64_data=base64_data, mime_type="application/pdf", extra=extra)
message = ChatMessage.from_user(content_parts=["Describe this document", file_content])

_, non_system_messages = _convert_messages_to_anthropic_format([message])
assert non_system_messages == [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this document"},
{
"type": "document",
"source": {"type": "base64", "media_type": "application/pdf", "data": base64_data},
"context": "This document contains a table",
"title": "A nice PDF",
},
],
}
]

def test_convert_message_to_anthropic_invalid(self):
"""
Test that the AnthropicChatGenerator component fails to convert an invalid ChatMessage to Anthropic format.
Expand All @@ -937,6 +972,13 @@ def test_convert_message_to_anthropic_invalid(self):
with pytest.raises(ValueError):
_convert_messages_to_anthropic_format([message])

base64_data = "JVBERi0xLjEKMSAwIG9iago8PC9UeXBlL0NhdGFsb2c+PgplbmRvYmoKdHJhaWxlcgo8PC9Sb290IDEgMCBSPj4KJSVFT0Y="
file_content = FileContent(base64_data=base64_data, mime_type="application/pdf")
message = ChatMessage.from_assistant()
message._content = [file_content]
with pytest.raises(ValueError, match="File content is only supported for user messages"):
_convert_messages_to_anthropic_format([message])

def test_finalize_reasoning_group_with_thinking_text(self):
"""Test that _finalize_reasoning_group appends a reasoning_text entry."""
formatted: list = []
Expand Down
Loading