deepset-ai
diff --git a/‎.github/labeler.yml‎
Lines changed: 9 additions & 9 deletions b/‎.github/labeler.yml‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎.github/workflows/CI_check_api_ref.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/CI_check_api_ref.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/mcp.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/mcp.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 17 additions & 17 deletions b/‎CONTRIBUTING.md‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎integrations/amazon_bedrock/CHANGELOG.md‎
Lines changed: 12 additions & 0 deletions b/‎integrations/amazon_bedrock/CHANGELOG.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎integrations/amazon_bedrock/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎integrations/amazon_bedrock/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py‎
Lines changed: 122 additions & 17 deletions b/‎integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py‎
Lines changed: 122 additions & 17 deletions
@@ -223,25 +223,25 @@ integration:unstructured-fileconverter:
       - any-glob-to-any-file: "integrations/unstructured/**/*"
       - any-glob-to-any-file: ".github/workflows/unstructured.yml"
 
-integration:watsonx:
+integration:valkey:
   - changed-files:
-      - any-glob-to-any-file: "integrations/watsonx/**/*"
-      - any-glob-to-any-file: ".github/workflows/watsonx.yml"      
+      - any-glob-to-any-file: "integrations/valkey/**/*"
+      - any-glob-to-any-file: ".github/workflows/valkey.yml"
 
-integration:weaviate:
+integration:watsonx:
   - changed-files:
-      - any-glob-to-any-file: "integrations/weaviate/**/*"
-      - any-glob-to-any-file: ".github/workflows/weaviate.yml"
+      - any-glob-to-any-file: "integrations/watsonx/**/*"
+      - any-glob-to-any-file: ".github/workflows/watsonx.yml"
 
 integration:weave:
   - changed-files:
       - any-glob-to-any-file: "integrations/weave/**/*"
       - any-glob-to-any-file: ".github/workflows/weave.yml"
 
-integration:valkey:
+integration:weaviate:
   - changed-files:
-      - any-glob-to-any-file: "integrations/valkey/**/*"
-      - any-glob-to-any-file: ".github/workflows/valkey.yml"
+      - any-glob-to-any-file: "integrations/weaviate/**/*"
+      - any-glob-to-any-file: ".github/workflows/weaviate.yml"
 
 # Topics
 topic:CI:
 
@@ -109,6 +109,11 @@ jobs:
         if: steps.changed.outputs.integrations != '[]'
         working-directory: website
         run: |
+          # docusaurus-mdx-checker is a package that is not frequently updated. Its dependency katex sometimes ships a 
+          # broken ESM build, where a __VERSION__ placeholder is left unresolved, causing a ReferenceError at import time.
+          # Node 22+ prefers ESM when available. We force CJS (CommonJS) resolution to use the working katex build.
+          # This should be safe because docusaurus-mdx-checker and its dependencies provide CJS builds.
+          export NODE_OPTIONS="--conditions=require"
           npx docusaurus-mdx-checker -v || {
               echo ""
               echo "For common MDX problems, see https://docusaurus.io/blog/preparing-your-site-for-docusaurus-v3#common-mdx-problems"
 
@@ -53,7 +53,7 @@ jobs:
 
       - name: Set up Docker
         if: runner.os == 'Linux'
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v4
 
       # we need to pull the mcp/brave-search image to run the test
       # on the actual mcp server as an example of real life usage
 
@@ -236,31 +236,31 @@ $ hatch run test:integration
 > Core integrations follow the naming convention `PREFIX-haystack`, where `PREFIX` can be the name of the technology
 > you're integrating Haystack with. For example, a deepset integration would be named as `deepset-haystack`.
 
-To create a new integration, from the root of the repo change directory into `integrations`:
+To create a new integration, run the scaffold script from the root of the repository:
 
 ```sh
-cd integrations
+python scripts/create_new_integration.py
 ```
 
-From there, use `hatch` to create the scaffold of the new integration:
+The script will interactively ask you for the integration **name** (e.g. `opensearch`, `amazon_bedrock`) and
+**component type** (e.g. `document_stores`, `generators`, `embedders`). You can also pass these as command-line
+arguments to skip the prompts:
 
 ```sh
-$ hatch --config hatch.toml new -i
-Project name: deepset-haystack
-Description []: An example integration, this text can be edited later
-
-deepset-haystack
-├── src
-│   └── deepset_haystack
-│       ├── __about__.py
-│       └── __init__.py
-├── tests
-│   └── __init__.py
-├── LICENSE.txt
-├── README.md
-└── pyproject.toml
+python scripts/create_new_integration.py --name YOUR_INTEGRATION_NAME --type YOUR_COMPONENT_TYPE
 ```
 
+The script takes care of the full setup in one step:
+
+- Scaffolds the integration folder under `integrations/` with the correct project structure (`pyproject.toml`,
+  source package, tests, pydoc config, example components, and a README).
+- Creates a GitHub Actions CI workflow at `.github/workflows/<name>.yml`.
+- Adds label rules to `.github/labeler.yml`.
+- Adds the new integration to the table in the root `README.md`.
+
+Once the script finishes, follow the printed next-steps to fill in your component code, add dependencies, and
+write tests.
+
 ### Improving The Documentation
 
 There are two types of documentation for this project: Python API docs, and Documentation pages
 
@@ -45,6 +45,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [google-ai-haystack](integrations/google_ai/)                           | Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/google-ai-haystack.svg)](https://pypi.org/project/google-ai-haystack)                                   | **Archived** - use [google-genai-haystack](https://pypi.org/project/google-genai-haystack) instead                                                                                                                                                         |
 | [google-genai-haystack](integrations/google_genai/)                     | Embedder, Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/google-genai-haystack.svg)](https://pypi.org/project/google-genai-haystack)                             | [![Test / google-genai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_genai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_genai.yml)                               |
 | [google-vertex-haystack](integrations/google_vertex/)                   | Embedder, Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack)                           | **Archived** - use [google-genai-haystack](https://pypi.org/project/google-genai-haystack) instead                                                                                                                                                         |
+| [hallo-haystack](integrations/hallo/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/hallo-haystack.svg)](https://pypi.org/project/hallo-haystack) | [![Test / hallo](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/hallo.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/hallo.yml) |
 | [hanlp-haystack](integrations/hanlp/)                                   | Preprocessor                | [![PyPI - Version](https://img.shields.io/pypi/v/hanlp-haystack.svg)](https://pypi.org/project/hanlp-haystack)                                           | [![Test / hanlp](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/hanlp.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/hanlp.yml)                                                    |
 | [jina-haystack](integrations/jina/)                                     | Connector, Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack)                                             | [![Test / jina](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml)                                                       |
 | [langfuse-haystack](integrations/langfuse/)                             | Tracer                      | [![PyPI - Version](https://img.shields.io/pypi/v/langfuse-haystack.svg?color=orange)](https://pypi.org/project/langfuse-haystack)                        | [![Test / langfuse](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml)                                           |
 
@@ -1,5 +1,17 @@
 # Changelog
 
+## [integrations/amazon_bedrock-v6.5.0] - 2026-03-03
+
+### 🚀 Features
+
+- Bedrock - support for FileContent + citations (#2883)
+
+### 📚 Documentation
+
+- Fix docstring for AmazonBedrockChatGenerator (#2813)
+- Simplify pydoc configs (#2855)
+
+
 ## [integrations/amazon_bedrock-v6.4.0] - 2026-02-05
 
 ### 🚀 Features
 
@@ -22,7 +22,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.23.0", "boto3>=1.28.57", "aioboto3>=14.0.0"]
+dependencies = ["haystack-ai>=2.24.1", "boto3>=1.28.57", "aioboto3>=14.0.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme"
 
@@ -1,5 +1,7 @@
 import base64
 import json
+import os
+import re
 from datetime import datetime, timezone
 from typing import Any
 
@@ -11,6 +13,7 @@
     ChatMessage,
     ChatRole,
     ComponentInfo,
+    FileContent,
     FinishReason,
     ImageContent,
     ReasoningContent,
@@ -26,7 +29,37 @@
 
 
 # see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ImageBlock.html for supported formats
-IMAGE_SUPPORTED_FORMATS = ["png", "jpeg", "gif", "webp"]
+IMAGE_MIME_TYPE_TO_FORMAT: dict[str, str] = {
+    "image/png": "png",
+    "image/jpeg": "jpeg",
+    "image/jpg": "jpeg",
+    "image/gif": "gif",
+    "image/webp": "webp",
+}
+
+# https://docs.aws.amazon.com/cli/latest/reference/bedrock-runtime/converse.html
+DOCUMENT_MIME_TYPE_TO_FORMAT: dict[str, str] = {
+    "application/pdf": "pdf",
+    "text/csv": "csv",
+    "application/msword": "doc",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+    "application/vnd.ms-excel": "xls",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+    "text/html": "html",
+    "text/plain": "txt",
+    "text/markdown": "md",
+}
+
+VIDEO_MIME_TYPE_TO_FORMAT: dict[str, str] = {
+    "video/x-matroska": "mkv",
+    "video/quicktime": "mov",
+    "video/mp4": "mp4",
+    "video/webm": "webm",
+    "video/x-flv": "flv",
+    "video/mpeg": "mpeg",
+    "video/x-ms-wmv": "wmv",
+    "video/3gpp": "three_gp",
+}
 
 # see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_MessageStopEvent.html
 FINISH_REASON_MAPPING: dict[str, FinishReason] = {
@@ -70,11 +103,11 @@ def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dic
     Convert a Haystack ImageContent to Bedrock format.
     """
 
-    image_format = image_content.mime_type.split("/")[-1] if image_content.mime_type else None
-    if image_format not in IMAGE_SUPPORTED_FORMATS:
+    image_format = IMAGE_MIME_TYPE_TO_FORMAT.get(image_content.mime_type or "")
+    if image_format is None:
         err_msg = (
-            f"Unsupported image format: {image_format}. "
-            f"Bedrock supports the following image formats: {IMAGE_SUPPORTED_FORMATS}"
+            f"Unsupported image MIME type: {image_content.mime_type}. "
+            f"Bedrock supports the following image formats: {list(set(IMAGE_MIME_TYPE_TO_FORMAT.values()))}"
         )
         raise ValueError(err_msg)
 
@@ -83,6 +116,51 @@ def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dic
     return {"image": {"format": image_format, "source": source}}
 
 
+def _convert_file_content_to_bedrock_format(file_content: FileContent) -> dict[str, Any]:
+    """
+    Convert a Haystack FileContent to Bedrock format.
+    """
+
+    if file_content.mime_type is None:
+        err_msg = "MIME type is required to use FileContent in Bedrock."
+        raise ValueError(err_msg)
+
+    if doc_format := DOCUMENT_MIME_TYPE_TO_FORMAT.get(file_content.mime_type):
+        source = {"bytes": base64.b64decode(file_content.base64_data)}
+
+        name = "filename"
+        if file_content.filename:
+            raw_name = os.path.splitext(file_content.filename)[0]
+            # Bedrock requires name to be present but is very strict about the format.
+            # See https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html
+            sanitized_name = re.sub(r"\s+", " ", re.sub(r"[^a-zA-Z0-9\s\-\[\]()]", "", raw_name)).strip()
+            if sanitized_name:
+                name = sanitized_name
+
+        doc_block = {
+            "document": {
+                "format": doc_format,
+                "source": source,
+                "name": name,
+                **({"context": file_content.extra["context"]} if file_content.extra.get("context") else {}),
+                **({"citations": file_content.extra["citations"]} if file_content.extra.get("citations") else {}),
+            }
+        }
+        return doc_block
+
+    if video_format := VIDEO_MIME_TYPE_TO_FORMAT.get(file_content.mime_type):
+        source = {"bytes": base64.b64decode(file_content.base64_data)}
+        video_block = {"video": {"format": video_format, "source": source}}
+        return video_block
+
+    err_msg = (
+        f"Unsupported file content MIME type: {file_content.mime_type}\n"
+        f"Bedrock supports the following formats:\n - Documents: {list(DOCUMENT_MIME_TYPE_TO_FORMAT.values())}\n"
+        f" - Videos: {list(VIDEO_MIME_TYPE_TO_FORMAT.values())}"
+    )
+    raise ValueError(err_msg)
+
+
 def _format_tool_call_message(tool_call_message: ChatMessage) -> dict[str, Any]:
     """
     Format a Haystack ChatMessage containing tool calls into Bedrock format.
@@ -231,31 +309,48 @@ def _format_reasoning_content(reasoning_content: ReasoningContent) -> list[dict[
     return formatted_contents
 
 
-def _format_text_image_message(message: ChatMessage) -> dict[str, Any]:
+def _format_user_message(message: ChatMessage) -> dict[str, Any]:
     """
-    Format a Haystack ChatMessage containing text and optional image content into Bedrock format.
+    Format a Haystack user ChatMessage into Bedrock format.
 
     :param message: Haystack ChatMessage.
     :returns: Dictionary representing the message in Bedrock's expected format.
-    :raises ValueError: If image content is found in an assistant message or an unsupported image format is used.
     """
     content_parts = message._content
 
     bedrock_content_blocks: list[dict[str, Any]] = []
-    # Add reasoning content if available as the first content block
-    if message.reasoning:
-        bedrock_content_blocks.extend(_format_reasoning_content(reasoning_content=message.reasoning))
 
     for part in content_parts:
         if isinstance(part, TextContent):
             bedrock_content_blocks.append({"text": part.text})
 
         elif isinstance(part, ImageContent):
-            if message.is_from(ChatRole.ASSISTANT):
-                err_msg = "Image content is not supported for assistant messages"
-                raise ValueError(err_msg)
             bedrock_content_blocks.append(_convert_image_content_to_bedrock_format(part))
 
+        elif isinstance(part, FileContent):
+            bedrock_content_blocks.append(_convert_file_content_to_bedrock_format(part))
+
+    return {"role": message.role.value, "content": bedrock_content_blocks}
+
+
+def _format_textual_assistant_message(message: ChatMessage) -> dict[str, Any]:
+    """
+    Format a Haystack assistant ChatMessage containing text and optionally reasoning into Bedrock format.
+
+    :param message: Haystack ChatMessage.
+    :returns: Dictionary representing the message in Bedrock's expected format.
+    """
+    content_parts = message._content
+
+    bedrock_content_blocks: list[dict[str, Any]] = []
+    # Add reasoning content if available as the first content block
+    if message.reasoning:
+        bedrock_content_blocks.extend(_format_reasoning_content(reasoning_content=message.reasoning))
+
+    for part in content_parts:
+        if isinstance(part, TextContent):
+            bedrock_content_blocks.append({"text": part.text})
+
     return {"role": message.role.value, "content": bedrock_content_blocks}
 
 
@@ -314,8 +409,10 @@ def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]],
             formatted_msg = _format_tool_call_message(msg)
         elif msg.tool_call_results:
             formatted_msg = _format_tool_result_message(msg)
-        else:
-            formatted_msg = _format_text_image_message(msg)
+        elif msg.is_from(ChatRole.USER):
+            formatted_msg = _format_user_message(msg)
+        elif msg.is_from(ChatRole.ASSISTANT):
+            formatted_msg = _format_textual_assistant_message(msg)
         if cache_point:
             formatted_msg["content"].append(cache_point)
         bedrock_formatted_messages.append(formatted_msg)
@@ -386,6 +483,14 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
                     if "redactedContent" in reasoning_content:
                         reasoning_content["redacted_content"] = reasoning_content.pop("redactedContent")
                     reasoning_contents.append({"reasoning_content": reasoning_content})
+                elif "citationsContent" in content_block:
+                    citations_content = content_block["citationsContent"]
+                    meta["citations"] = citations_content
+                    if "content" in citations_content:
+                        for entry in citations_content["content"]:
+                            text = entry.get("text", "")
+                            if text.strip():
+                                text_content.append(text)
 
             reasoning_text = ""
             for content in reasoning_contents:
@@ -397,7 +502,7 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
             # Create a single ChatMessage with combined text and tool calls
             replies.append(
                 ChatMessage.from_assistant(
-                    " ".join(text_content),
+                    "".join(text_content),
                     tool_calls=tool_calls,
                     meta=meta,
                     reasoning=ReasoningContent(
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ classifiers = [`
`22`	`22`	`"Programming Language :: Python :: Implementation :: CPython",`
`23`	`23`	`"Programming Language :: Python :: Implementation :: PyPy",`
`24`	`24`	`]`
`25`		`-dependencies = ["haystack-ai>=2.23.0", "boto3>=1.28.57", "aioboto3>=14.0.0"]`
	`25`	`+dependencies = ["haystack-ai>=2.24.1", "boto3>=1.28.57", "aioboto3>=14.0.0"]`
`26`	`26`
`27`	`27`	`[project.urls]`
`28`	`28`	`Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme"`