diff --git a/.github/workflows/docs-website-test-docs-snippets.yml b/.github/workflows/docs-website-test-docs-snippets.yml index 6779f86b67..8b6f0b6af9 100644 --- a/.github/workflows/docs-website-test-docs-snippets.yml +++ b/.github/workflows/docs-website-test-docs-snippets.yml @@ -11,22 +11,22 @@ on: default: 'main' type: string - # TEMPORARILY DISABLED - # push: - # paths: - # - 'docs-website/docs/**' - # - 'docs-website/versioned_docs/**' - # - 'docs-website/scripts/test_python_snippets.py' - # - 'docs-website/scripts/generate_requirements.py' - # - '.github/workflows/docs-website-test-docs-snippets.yml' +env: + HATCH_VERSION: "1.16.5" + PYTHON_VERSION: "3.11" jobs: test-docs-snippets: runs-on: ubuntu-latest - timeout-minutes: 20 + timeout-minutes: 30 env: - # TODO: We'll properly set these after migration to core project + AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} + AZURE_OPENAI_AD_TOKEN: ${{ secrets.AZURE_OPENAI_AD_TOKEN }} + AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} + CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SERPERDEV_API_KEY: ${{ secrets.SERPERDEV_API_KEY }} + steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -36,34 +36,28 @@ jobs: with: python-version: '3.11' + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Generate API reference for Docusaurus + run: hatch run docs + - name: Install base dependencies run: | python -m pip install --upgrade pip pip install requests toml - - name: Generate requirements.txt - run: | - # Use input version or default to main - if [ "${{ github.event.inputs.haystack_version }}" != "" ]; then - VERSION="${{ github.event.inputs.haystack_version }}" - else - VERSION="main" - fi - echo "Generating requirements.txt for Haystack version: $VERSION" - python docs-website/scripts/generate_requirements.py --version "$VERSION" - - - name: Install dependencies - run: | - if [ -f requirements.txt ]; then - echo "Installing dependencies from requirements.txt" - pip install -r requirements.txt - else - echo "Error: requirements.txt was not generated" - exit 1 - fi - - name: Run snippet tests (verbose) + shell: bash run: | - # TEMPORARY: Testing with single file to make CI green - # TODO: Expand to run all docs: --paths docs versioned_docs - python docs-website/scripts/test_python_snippets.py docs-website/reference/haystack-api/agents_api.md + hatch -e test env run -- python docs-website/scripts/test_python_snippets.py --verbose tmp_api_reference/ + + notify-slack-on-failure: + if: failure() && github.ref_name == 'main' + needs: + - test-docs-snippets + runs-on: ubuntu-slim + steps: + - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1 + with: + slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }} diff --git a/docs-website/scripts/generate_requirements.py b/docs-website/scripts/generate_requirements.py deleted file mode 100755 index 3852764639..0000000000 --- a/docs-website/scripts/generate_requirements.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-FileCopyrightText: 2022-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -""" -Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing. - -This script fetches the pyproject.toml from a specific Haystack version or branch, -parses it, and generates a requirements.txt with all dependencies needed -to run the Python code snippets in the documentation. -""" - -import argparse -import sys -from pathlib import Path - -import requests -import toml - -_VERSION_SPLITTERS = ("[", "==", ">=", "<", "!=", "~=") - - -def _package_name(dep: str) -> str: - """Return the dependency name stripped of extras and version specifiers.""" - - candidate = dep - for splitter in _VERSION_SPLITTERS: - candidate = candidate.split(splitter)[0] - return candidate.strip() - - -def fetch_haystack_deps(version: str = "main") -> list[str]: - """ - Fetch and parse Haystack's pyproject.toml to extract dependencies. - - Args: - version: Haystack version (e.g., "2.16.1", "main", "develop") - """ - if version == "main": - url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/main/pyproject.toml" - elif version == "develop": - url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/develop/pyproject.toml" - else: - # Format version tag properly (add 'v' prefix if not present) - if not version.startswith("v"): - version = f"v{version}" - url = f"https://raw.githubusercontent.com/deepset-ai/haystack/refs/tags/{version}/pyproject.toml" - - try: - response = requests.get(url, timeout=30) - response.raise_for_status() - except requests.RequestException as e: - print(f"Failed to fetch pyproject.toml for version {version}: {e}") - print(f"URL: {url}") - sys.exit(1) - - try: - config = toml.loads(response.text) - except toml.TomlDecodeError as e: - print(f"Failed to parse pyproject.toml: {e}") - sys.exit(1) - - # Core project dependencies - core_deps = config.get("project", {}).get("dependencies", []) - - # Test environment dependencies (most comprehensive for docs testing) - test_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("test", {}) - test_deps = test_env.get("dependencies", []) if test_env else [] - - # E2E environment dependencies (additional components) - e2e_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("e2e", {}) - e2e_deps = e2e_env.get("dependencies", []) if e2e_env else [] - - # Combine all dependencies - all_deps = [] - all_deps.extend(core_deps) - all_deps.extend(test_deps) - all_deps.extend(e2e_deps) - - # Remove duplicates while preserving order - seen = set() - unique_deps = [] - for dep in all_deps: - package_name = _package_name(dep) - if package_name not in seen: - seen.add(package_name) - unique_deps.append(dep) - - # Filter out test-only dependencies that aren't needed for docs - test_only_packages = { - "pytest", - "pytest-bdd", - "pytest-cov", - "pytest-asyncio", - "pytest-rerunfailures", - "coverage", - "mypy", - "pylint", - "ipython", - "colorama", - } - - filtered_deps = [] - for dep in unique_deps: - package_name = _package_name(dep) - if package_name.lower() not in test_only_packages: - filtered_deps.append(dep) - - return filtered_deps - - -def main() -> None: - """Entry point for generating requirements for docs snippet tests.""" - parser = argparse.ArgumentParser( - description="Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing" - ) - parser.add_argument( - "--version", - "-v", - default="main", - help="Haystack version to fetch dependencies for (e.g., '2.16.1', 'main', 'develop'). Default: main", - ) - parser.add_argument( - "--output", "-o", default="requirements.txt", help="Output file path. Default: requirements.txt" - ) - parser.add_argument("--verbose", action="store_true", help="Show detailed output including all dependencies") - - args = parser.parse_args() - - print(f"Fetching Haystack dependencies for version: {args.version}") - deps = fetch_haystack_deps(args.version) - - requirements_content = f"""# Auto-generated from Haystack pyproject.toml (version: {args.version}) -# For testing docs snippets -# Generated by scripts/generate_requirements.py -""" - - for dep in sorted(deps): - requirements_content += f"{dep}\n" - - requirements_path = Path(args.output) - requirements_path.write_text(requirements_content) - - print(f"Generated {requirements_path} with {len(deps)} dependencies") - - if args.verbose: - print("\nTop-level dependencies:") - for dep in sorted(deps)[:10]: # Show first 10 - print(f" {dep}") - if len(deps) > 10: - print(f" ... and {len(deps) - 10} more") - - -if __name__ == "__main__": - main() diff --git a/docs-website/scripts/test_python_snippets.py b/docs-website/scripts/test_python_snippets.py index db600c34f5..a7a9c534ad 100755 --- a/docs-website/scripts/test_python_snippets.py +++ b/docs-website/scripts/test_python_snippets.py @@ -415,7 +415,7 @@ def main(argv: list[str] | None = None) -> int: parser.add_argument( "targets", nargs="*", - help=("Optional positional list of files or directories to scan. If omitted, --paths is used."), + help="Optional positional list of files or directories to scan. If omitted, --paths is used.", ) parser.add_argument( "--paths", diff --git a/haystack/components/audio/whisper_local.py b/haystack/components/audio/whisper_local.py index d8a4aacf7e..c57583b11a 100644 --- a/haystack/components/audio/whisper_local.py +++ b/haystack/components/audio/whisper_local.py @@ -40,7 +40,7 @@ class LocalWhisperTranscriber: [GitHub repository](https://github.com/openai/whisper). ### Usage example - + ```python from haystack.components.audio import LocalWhisperTranscriber diff --git a/haystack/components/connectors/openapi.py b/haystack/components/connectors/openapi.py index 0d4afc561e..1435946e9c 100644 --- a/haystack/components/connectors/openapi.py +++ b/haystack/components/connectors/openapi.py @@ -24,14 +24,21 @@ class OpenAPIConnector: pass input arguments to this component. Example: + ```python from haystack.utils import Secret from haystack.components.connectors.openapi import OpenAPIConnector + serper_dev_token = Secret.from_env_var("SERPERDEV_API_KEY") + + def my_custom_config_factory(): + # Create and return a custom configuration for the OpenAPIClient + pass + connector = OpenAPIConnector( openapi_spec="https://bit.ly/serperdev_openapi", - credentials=Secret.from_env_var("SERPERDEV_API_KEY"), - service_kwargs={"config_factory": my_custom_config_factory} + credentials=serper_dev_token, + service_kwargs={"config_factory": my_custom_config_factory()} ) response = connector.run( operation_id="search", @@ -39,7 +46,6 @@ class OpenAPIConnector: ) ``` Note: - - The `parameters` argument is required for this component. - The `service_kwargs` argument is optional, it can be used to pass additional options to the OpenAPIClient. """ diff --git a/haystack/components/connectors/openapi_service.py b/haystack/components/connectors/openapi_service.py index 7612ac72a6..4ba1a777c8 100644 --- a/haystack/components/connectors/openapi_service.py +++ b/haystack/components/connectors/openapi_service.py @@ -166,15 +166,19 @@ class OpenAPIServiceConnector: with tool calling capabilities. In the example below we use the tool call payload directly, but in a real-world scenario, the tool calls would usually be generated by the Chat Generator component. - Usage example: + You need to define the `serper_token` variable with your Serper.dev API token for the example to work. + Can be through the `SERPERDEV_API_KEY` environment variable or by directly assigning the token string to the + variable in the code. + Usage example: + ```python import json import requests from haystack.components.connectors import OpenAPIServiceConnector from haystack.dataclasses import ChatMessage, ToolCall - + from haystack.utils import Secret tool_call = ToolCall( tool_name="search", @@ -182,7 +186,7 @@ class OpenAPIServiceConnector: ) message = ChatMessage.from_assistant(tool_calls=[tool_call]) - serper_token = "your_serper_dev_token" + serper_token = Secret.from_env_var("SERPERDEV_API_KEY").resolve_value() serperdev_openapi_spec = json.loads(requests.get("https://bit.ly/serper_dev_spec").text) service_connector = OpenAPIServiceConnector() result = service_connector.run( diff --git a/haystack/components/converters/azure.py b/haystack/components/converters/azure.py index 03cbd80371..7ff45d9365 100644 --- a/haystack/components/converters/azure.py +++ b/haystack/components/converters/azure.py @@ -38,7 +38,7 @@ class AzureOCRDocumentConverter: [Azure documentation](https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/quickstarts/get-started-sdks-rest-api). ### Usage example - + ```python import os from datetime import datetime diff --git a/haystack/components/converters/csv.py b/haystack/components/converters/csv.py index a255cf18e4..b825fd1b89 100644 --- a/haystack/components/converters/csv.py +++ b/haystack/components/converters/csv.py @@ -27,15 +27,18 @@ class CSVToDocument: It can attach metadata to the resulting documents. ### Usage example - ```python from haystack.components.converters.csv import CSVToDocument from datetime import datetime + converter = CSVToDocument() - results = converter.run(sources=["sample.csv"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/csv/sample_1.csv"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] + print(documents[0].content) - # 'col1,col2\\nrow1,row1\\nrow2,row2\\n' + # >> 'col1,col2\\nrow1,row1\\nrow2,row2\\n' ``` """ diff --git a/haystack/components/converters/docx.py b/haystack/components/converters/docx.py index d2ec4767fb..d431c45267 100644 --- a/haystack/components/converters/docx.py +++ b/haystack/components/converters/docx.py @@ -125,14 +125,19 @@ class DOCXToDocument: This component does not preserve page breaks in the original document. Usage example: + ```python from haystack.components.converters.docx import DOCXToDocument, DOCXTableFormat, DOCXLinkFormat + from datetime import datetime converter = DOCXToDocument(table_format=DOCXTableFormat.CSV, link_format=DOCXLinkFormat.MARKDOWN) - results = converter.run(sources=["sample.docx"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/docx/sample_docx.docx"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] + print(documents[0].content) - # 'This is a text from the DOCX file.' + # >> 'This is a text from the DOCX file.' ``` """ diff --git a/haystack/components/converters/file_to_file_content.py b/haystack/components/converters/file_to_file_content.py index f9798b89c0..4de53e8da2 100644 --- a/haystack/components/converters/file_to_file_content.py +++ b/haystack/components/converters/file_to_file_content.py @@ -22,6 +22,7 @@ class FileToFileContent: Converts files to FileContent objects to be included in ChatMessage objects. ### Usage example + ```python from haystack.components.converters import FileToFileContent diff --git a/haystack/components/converters/html.py b/haystack/components/converters/html.py index dd8c2885bc..0b35c8876d 100644 --- a/haystack/components/converters/html.py +++ b/haystack/components/converters/html.py @@ -27,10 +27,11 @@ class HTMLToDocument: from haystack.components.converters import HTMLToDocument converter = HTMLToDocument() - results = converter.run(sources=["path/to/sample.html"]) + results = converter.run(sources=["test/test_files/html/paul_graham_superlinear.html"]) documents = results["documents"] + print(documents[0].content) - # 'This is a text from the HTML file.' + # > 'This is a text from the HTML file.' ``` """ diff --git a/haystack/components/converters/image/document_to_image.py b/haystack/components/converters/image/document_to_image.py index f30b11b901..2fd82ef78d 100644 --- a/haystack/components/converters/image/document_to_image.py +++ b/haystack/components/converters/image/document_to_image.py @@ -34,32 +34,33 @@ class DocumentToImageContent: - For PDF files, a `page_number` key specifying which page to extract ### Usage example - ```python - from haystack import Document - from haystack.components.converters.image.document_to_image import DocumentToImageContent - - converter = DocumentToImageContent( - file_path_meta_field="file_path", - root_path="/data/files", - detail="high", - size=(800, 600) - ) - - documents = [ - Document(content="Optional description of image.jpg", meta={"file_path": "image.jpg"}), - Document(content="Text content of page 1 of doc.pdf", meta={"file_path": "doc.pdf", "page_number": 1}) - ] - - result = converter.run(documents) - image_contents = result["image_contents"] - # [ImageContent( - # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'image.jpg'} - # ), - # ImageContent( - # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', - # meta={'page_number': 1, 'file_path': 'doc.pdf'} - # )] - ``` + + ```python + from haystack import Document + from haystack.components.converters.image.document_to_image import DocumentToImageContent + + converter = DocumentToImageContent( + file_path_meta_field="file_path", + root_path="/data/files", + detail="high", + size=(800, 600) + ) + + documents = [ + Document(content="Optional description of image.jpg", meta={"file_path": "image.jpg"}), + Document(content="Text content of page 1 of doc.pdf", meta={"file_path": "doc.pdf", "page_number": 1}) + ] + + result = converter.run(documents) + image_contents = result["image_contents"] + # [ImageContent( + # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', meta={'file_path': 'image.jpg'} + # ), + # ImageContent( + # base64_image='/9j/4A...', mime_type='image/jpeg', detail='high', + # meta={'page_number': 1, 'file_path': 'doc.pdf'} + # )] + ``` """ def __init__( diff --git a/haystack/components/converters/markdown.py b/haystack/components/converters/markdown.py index 72a50657b0..063c7c5b3a 100644 --- a/haystack/components/converters/markdown.py +++ b/haystack/components/converters/markdown.py @@ -27,12 +27,15 @@ class MarkdownToDocument: Converts a Markdown file into a text Document. Usage example: + ```python from haystack.components.converters import MarkdownToDocument from datetime import datetime converter = MarkdownToDocument() - results = converter.run(sources=["path/to/sample.md"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/markdown/sample.md"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] print(documents[0].content) # 'This is a text from the markdown file.' diff --git a/haystack/components/converters/msg.py b/haystack/components/converters/msg.py index c4f6a730eb..aaa74586f6 100644 --- a/haystack/components/converters/msg.py +++ b/haystack/components/converters/msg.py @@ -35,7 +35,7 @@ class MSGToDocument: from datetime import datetime converter = MSGToDocument() - results = converter.run(sources=["sample.msg"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run(sources=["test/test_files/msg/sample.msg"], meta={"date_added": datetime.now().isoformat()}) documents = results["documents"] attachments = results["attachments"] print(documents[0].content) diff --git a/haystack/components/converters/multi_file_converter.py b/haystack/components/converters/multi_file_converter.py index bd2db8159f..aff53890ae 100644 --- a/haystack/components/converters/multi_file_converter.py +++ b/haystack/components/converters/multi_file_converter.py @@ -55,7 +55,7 @@ class MultiFileConverter: from haystack.super_components.converters import MultiFileConverter converter = MultiFileConverter() - converter.run(sources=["test.txt", "test.pdf"], meta={}) + converter.run(sources=["test/test_files/txt/doc_1.txt", "test/test_files/pdf/sample_pdf_1.pdf"], meta={}) ``` """ diff --git a/haystack/components/converters/pdfminer.py b/haystack/components/converters/pdfminer.py index 8c6e558ae9..2033338959 100644 --- a/haystack/components/converters/pdfminer.py +++ b/haystack/components/converters/pdfminer.py @@ -31,14 +31,18 @@ class PDFMinerToDocument: Uses `pdfminer` compatible converters to convert PDF files to Documents. https://pdfminersix.readthedocs.io/en/latest/ Usage example: + ```python from haystack.components.converters.pdfminer import PDFMinerToDocument + from datetime import datetime converter = PDFMinerToDocument() - results = converter.run(sources=["sample.pdf"], meta={"date_added": datetime.now().isoformat()}) - documents = results["documents"] - print(documents[0].content) - # 'This is a text from the PDF file.' + results = converter.run( + sources=["test/test_files/pdf/sample_pdf_1.pdf"], meta={"date_added": datetime.now().isoformat()} + ) + + print(results["documents"][0].content) + # >> 'This is a text from the PDF file.' ``` """ diff --git a/haystack/components/converters/pptx.py b/haystack/components/converters/pptx.py index 0941d0474a..f4d9fed086 100644 --- a/haystack/components/converters/pptx.py +++ b/haystack/components/converters/pptx.py @@ -26,14 +26,19 @@ class PPTXToDocument: Converts PPTX files to Documents. Usage example: + ```python from haystack.components.converters.pptx import PPTXToDocument + from datetime import datetime converter = PPTXToDocument() - results = converter.run(sources=["sample.pptx"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/pptx/sample_pptx.pptx"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] + print(documents[0].content) - # 'This is the text from the PPTX file.' + # >> 'This is the text from the PPTX file.' ``` """ diff --git a/haystack/components/converters/pypdf.py b/haystack/components/converters/pypdf.py index 8f875bc57a..45be6d01b3 100644 --- a/haystack/components/converters/pypdf.py +++ b/haystack/components/converters/pypdf.py @@ -59,12 +59,16 @@ class PyPDFToDocument: ```python from haystack.components.converters.pypdf import PyPDFToDocument + from datetime import datetime converter = PyPDFToDocument() - results = converter.run(sources=["sample.pdf"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/pdf/sample_pdf_1.pdf"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] + print(documents[0].content) - # 'This is a text from the PDF file.' + # >> 'This is a text from the PDF file.' ``` """ diff --git a/haystack/components/converters/tika.py b/haystack/components/converters/tika.py index 656f6c575c..a102549e5a 100644 --- a/haystack/components/converters/tika.py +++ b/haystack/components/converters/tika.py @@ -60,8 +60,10 @@ class TikaDocumentConverter: see the [official documentation](https://github.com/apache/tika-docker/blob/main/README.md#usage). Usage example: + ```python from haystack.components.converters.tika import TikaDocumentConverter + from datetime import datetime converter = TikaDocumentConverter() results = converter.run( @@ -69,8 +71,9 @@ class TikaDocumentConverter: meta={"date_added": datetime.now().isoformat()} ) documents = results["documents"] + print(documents[0].content) - # 'This is a text from the docx file.' + # >> 'This is a text from the docx file.' ``` """ diff --git a/haystack/components/converters/txt.py b/haystack/components/converters/txt.py index f1e9de07f9..3c7502c00b 100644 --- a/haystack/components/converters/txt.py +++ b/haystack/components/converters/txt.py @@ -28,10 +28,11 @@ class TextFileToDocument: from haystack.components.converters.txt import TextFileToDocument converter = TextFileToDocument() - results = converter.run(sources=["sample.txt"]) + results = converter.run(sources=["test/test_files/txt/doc_1.txt"]) documents = results["documents"] + print(documents[0].content) - # 'This is the content from the txt file.' + # >> 'This is the content from the txt file.' ``` """ diff --git a/haystack/components/converters/xlsx.py b/haystack/components/converters/xlsx.py index 89a505880f..370e1615f7 100644 --- a/haystack/components/converters/xlsx.py +++ b/haystack/components/converters/xlsx.py @@ -37,10 +37,13 @@ class XLSXToDocument: from datetime import datetime converter = XLSXToDocument() - results = converter.run(sources=["sample.xlsx"], meta={"date_added": datetime.now().isoformat()}) + results = converter.run( + sources=["test/test_files/xlsx/basic_tables_two_sheets.xlsx"], meta={"date_added": datetime.now().isoformat()} + ) documents = results["documents"] + print(documents[0].content) - # ",A,B\\n1,col_a,col_b\\n2,1.5,test\\n" + # >> ",A,B\\n1,col_a,col_b\\n2,1.5,test\\n" ``` """ diff --git a/haystack/components/embedders/azure_document_embedder.py b/haystack/components/embedders/azure_document_embedder.py index 39060a6c63..490a36710b 100644 --- a/haystack/components/embedders/azure_document_embedder.py +++ b/haystack/components/embedders/azure_document_embedder.py @@ -21,13 +21,12 @@ class AzureOpenAIDocumentEmbedder(OpenAIDocumentEmbedder): Calculates document embeddings using OpenAI models deployed on Azure. ### Usage example - + ```python from haystack import Document from haystack.components.embedders import AzureOpenAIDocumentEmbedder doc = Document(content="I love pizza!") - document_embedder = AzureOpenAIDocumentEmbedder() result = document_embedder.run([doc]) diff --git a/haystack/components/embedders/azure_text_embedder.py b/haystack/components/embedders/azure_text_embedder.py index 96c13b88ea..a17efcb33c 100644 --- a/haystack/components/embedders/azure_text_embedder.py +++ b/haystack/components/embedders/azure_text_embedder.py @@ -19,12 +19,11 @@ class AzureOpenAITextEmbedder(OpenAITextEmbedder): Embeds strings using OpenAI models deployed on Azure. ### Usage example - + ```python from haystack.components.embedders import AzureOpenAITextEmbedder text_to_embed = "I love pizza!" - text_embedder = AzureOpenAITextEmbedder() print(text_embedder.run(text_to_embed)) diff --git a/haystack/components/embedders/hugging_face_api_document_embedder.py b/haystack/components/embedders/hugging_face_api_document_embedder.py index f9bcf58be6..f572628d3f 100644 --- a/haystack/components/embedders/hugging_face_api_document_embedder.py +++ b/haystack/components/embedders/hugging_face_api_document_embedder.py @@ -36,7 +36,7 @@ class HuggingFaceAPIDocumentEmbedder: ### Usage examples #### With free serverless inference API - + ```python from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder from haystack.utils import Secret @@ -55,7 +55,7 @@ class HuggingFaceAPIDocumentEmbedder: ``` #### With paid inference endpoints - + ```python from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder from haystack.utils import Secret @@ -74,7 +74,7 @@ class HuggingFaceAPIDocumentEmbedder: ``` #### With self-hosted text embeddings inference - + ```python from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder from haystack.dataclasses import Document diff --git a/haystack/components/embedders/hugging_face_api_text_embedder.py b/haystack/components/embedders/hugging_face_api_text_embedder.py index 6bd8bd1580..5eb45a8c73 100644 --- a/haystack/components/embedders/hugging_face_api_text_embedder.py +++ b/haystack/components/embedders/hugging_face_api_text_embedder.py @@ -29,7 +29,7 @@ class HuggingFaceAPITextEmbedder: ### Usage examples #### With free serverless inference API - + ```python from haystack.components.embedders import HuggingFaceAPITextEmbedder from haystack.utils import Secret @@ -44,7 +44,7 @@ class HuggingFaceAPITextEmbedder: ``` #### With paid inference endpoints - + ```python from haystack.components.embedders import HuggingFaceAPITextEmbedder from haystack.utils import Secret @@ -58,7 +58,7 @@ class HuggingFaceAPITextEmbedder: ``` #### With self-hosted text embeddings inference - + ```python from haystack.components.embedders import HuggingFaceAPITextEmbedder from haystack.utils import Secret diff --git a/haystack/components/embedders/image/sentence_transformers_doc_image_embedder.py b/haystack/components/embedders/image/sentence_transformers_doc_image_embedder.py index f233aa0523..f609a83971 100644 --- a/haystack/components/embedders/image/sentence_transformers_doc_image_embedder.py +++ b/haystack/components/embedders/image/sentence_transformers_doc_image_embedder.py @@ -32,6 +32,7 @@ class SentenceTransformersDocumentImageEmbedder: The embedding of each Document is stored in the `embedding` field of the Document. ### Usage example + ```python from haystack import Document from haystack.components.embedders.image import SentenceTransformersDocumentImageEmbedder diff --git a/haystack/components/embedders/openai_document_embedder.py b/haystack/components/embedders/openai_document_embedder.py index d5a364c092..1d3626c3c2 100644 --- a/haystack/components/embedders/openai_document_embedder.py +++ b/haystack/components/embedders/openai_document_embedder.py @@ -24,16 +24,15 @@ class OpenAIDocumentEmbedder: Computes document embeddings using OpenAI models. ### Usage example - + ```python from haystack import Document from haystack.components.embedders import OpenAIDocumentEmbedder doc = Document(content="I love pizza!") - document_embedder = OpenAIDocumentEmbedder() - result = document_embedder.run([doc]) + print(result['documents'][0].embedding) # [0.017020374536514282, -0.023255806416273117, ...] diff --git a/haystack/components/embedders/openai_text_embedder.py b/haystack/components/embedders/openai_text_embedder.py index c59b2926c3..24672bb050 100644 --- a/haystack/components/embedders/openai_text_embedder.py +++ b/haystack/components/embedders/openai_text_embedder.py @@ -21,12 +21,11 @@ class OpenAITextEmbedder: You can use it to embed user query and send it to an embedding Retriever. ### Usage example - + ```python from haystack.components.embedders import OpenAITextEmbedder text_to_embed = "I love pizza!" - text_embedder = OpenAITextEmbedder() print(text_embedder.run(text_to_embed)) diff --git a/haystack/components/embedders/sentence_transformers_document_embedder.py b/haystack/components/embedders/sentence_transformers_document_embedder.py index d15ae11af3..252111783b 100644 --- a/haystack/components/embedders/sentence_transformers_document_embedder.py +++ b/haystack/components/embedders/sentence_transformers_document_embedder.py @@ -25,7 +25,7 @@ class SentenceTransformersDocumentEmbedder: and send them to DocumentWriter to write into a Document Store. ### Usage example: - + ```python from haystack import Document from haystack.components.embedders import SentenceTransformersDocumentEmbedder diff --git a/haystack/components/embedders/sentence_transformers_sparse_document_embedder.py b/haystack/components/embedders/sentence_transformers_sparse_document_embedder.py index 9c4f6165f1..c269851f96 100644 --- a/haystack/components/embedders/sentence_transformers_sparse_document_embedder.py +++ b/haystack/components/embedders/sentence_transformers_sparse_document_embedder.py @@ -25,7 +25,7 @@ class SentenceTransformersSparseDocumentEmbedder: and send them to DocumentWriter to write a into a Document Store. ### Usage example: - + ```python from haystack import Document from haystack.components.embedders import SentenceTransformersSparseDocumentEmbedder diff --git a/haystack/components/embedders/sentence_transformers_sparse_text_embedder.py b/haystack/components/embedders/sentence_transformers_sparse_text_embedder.py index 429dcad9ed..31154a8c07 100644 --- a/haystack/components/embedders/sentence_transformers_sparse_text_embedder.py +++ b/haystack/components/embedders/sentence_transformers_sparse_text_embedder.py @@ -22,6 +22,7 @@ class SentenceTransformersSparseTextEmbedder: You can use it to embed user query and send it to a sparse embedding retriever. Usage example: + ```python from haystack.components.embedders import SentenceTransformersSparseTextEmbedder diff --git a/haystack/components/embedders/sentence_transformers_text_embedder.py b/haystack/components/embedders/sentence_transformers_text_embedder.py index 9ab9abcb64..3285fab5d0 100644 --- a/haystack/components/embedders/sentence_transformers_text_embedder.py +++ b/haystack/components/embedders/sentence_transformers_text_embedder.py @@ -21,6 +21,7 @@ class SentenceTransformersTextEmbedder: You can use it to embed user query and send it to an embedding retriever. Usage example: + ```python from haystack.components.embedders import SentenceTransformersTextEmbedder diff --git a/haystack/components/evaluators/sas_evaluator.py b/haystack/components/evaluators/sas_evaluator.py index e38cd84955..f5fcdab741 100644 --- a/haystack/components/evaluators/sas_evaluator.py +++ b/haystack/components/evaluators/sas_evaluator.py @@ -41,7 +41,7 @@ class SASEvaluator: "The Meiji Restoration in 1868 transformed Japan into a modernized world power.", ] result = evaluator.run( - ground_truths_answers=ground_truths, predicted_answers=predictions + ground_truth_answers=ground_truths, predicted_answers=predictions ) print(result["score"]) diff --git a/haystack/components/extractors/image/llm_document_content_extractor.py b/haystack/components/extractors/image/llm_document_content_extractor.py index 2bedcf89d9..7093887537 100644 --- a/haystack/components/extractors/image/llm_document_content_extractor.py +++ b/haystack/components/extractors/image/llm_document_content_extractor.py @@ -77,6 +77,7 @@ class LLMDocumentContentExtractor: Documents that fail extraction are returned in ``failed_documents`` with ``content_extraction_error`` in metadata. ### Usage example + ```python from haystack import Document from haystack.components.generators.chat import OpenAIChatGenerator @@ -91,32 +92,37 @@ class LLMDocumentContentExtractor: Return this metadata as additional key-value pairs in the same JSON object. \"\"\" - chat_generator = OpenAIChatGenerator() + chat_generator = OpenAIChatGenerator( + generation_kwargs={ + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "entity_extraction", + "schema": { + "type": "object", + "properties": { + "document_content": {"type": "string"}, + "author": {"type": "string"}, + "date": {"type": "string"}, + "document_type": {"type": "string"}, + "title": {"type": "string"}, + }, + "additionalProperties": False, + }, + }, + } + } + ) + extractor = LLMDocumentContentExtractor( chat_generator=chat_generator, - generation_kwargs={ - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "entity_extraction", - "schema": { - "type": "object", - "properties": { - "document_content": {"type": "string"}, - "author": {"type": "string"}, - "date": {"type": "string"}, - "document_type": {"type": "string"}, - "title": {"type": "string"}, - }, - "additionalProperties": False, - }, - }, - } - } + file_path_meta_field="file_path", + raise_on_failure=False ) + documents = [ - Document(content="", meta={"file_path": "image.jpg"}), - Document(content="", meta={"file_path": "document.pdf", "page_number": 1}) + Document(content="", meta={"file_path": "/test/test_files/images/image_metadata.png"}), + Document(content="", meta={"file_path": "/test/test_files/images/apple.jpg", "page_number": 1}) ] result = extractor.run(documents=documents) updated_documents = result["documents"] diff --git a/haystack/components/extractors/llm_metadata_extractor.py b/haystack/components/extractors/llm_metadata_extractor.py index 4f24fa8109..794a6d8c5c 100644 --- a/haystack/components/extractors/llm_metadata_extractor.py +++ b/haystack/components/extractors/llm_metadata_extractor.py @@ -126,7 +126,7 @@ class LLMMetadataExtractor: extractor = LLMMetadataExtractor( prompt=NER_PROMPT, - chat_generator=generator, + chat_generator=chat_generator, expected_keys=["entities"], raise_on_failure=False, ) diff --git a/haystack/components/extractors/named_entity_extractor.py b/haystack/components/extractors/named_entity_extractor.py index 210afa14e8..259d0855fc 100644 --- a/haystack/components/extractors/named_entity_extractor.py +++ b/haystack/components/extractors/named_entity_extractor.py @@ -88,6 +88,7 @@ class NamedEntityExtractor: in the documents. Usage example: + ```python from haystack import Document from haystack.components.extractors.named_entity_extractor import NamedEntityExtractor diff --git a/haystack/components/generators/azure.py b/haystack/components/generators/azure.py index 2fef5d83aa..87e9b1dfba 100644 --- a/haystack/components/generators/azure.py +++ b/haystack/components/generators/azure.py @@ -33,7 +33,7 @@ class AzureOpenAIGenerator(OpenAIGenerator): ### Usage example - + ```python from haystack.components.generators import AzureOpenAIGenerator from haystack.utils import Secret diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index 6beacbb096..ac3a0d0aeb 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -42,7 +42,7 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator): [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat). ### Usage example - + ```python from haystack.components.generators.chat import AzureOpenAIChatGenerator from haystack.dataclasses import ChatMessage diff --git a/haystack/components/generators/chat/azure_responses.py b/haystack/components/generators/chat/azure_responses.py index 8edae9cf51..89a2cdae0b 100644 --- a/haystack/components/generators/chat/azure_responses.py +++ b/haystack/components/generators/chat/azure_responses.py @@ -34,7 +34,7 @@ class AzureOpenAIResponsesChatGenerator(OpenAIResponsesChatGenerator): [OpenAI documentation](https://platform.openai.com/docs/api-reference/responses). ### Usage example - + ```python from haystack.components.generators.chat import AzureOpenAIResponsesChatGenerator from haystack.dataclasses import ChatMessage diff --git a/haystack/components/generators/chat/fallback.py b/haystack/components/generators/chat/fallback.py index cdcc85f39c..fe4b58c534 100644 --- a/haystack/components/generators/chat/fallback.py +++ b/haystack/components/generators/chat/fallback.py @@ -38,7 +38,7 @@ class FallbackChatGenerator: responses, read timeout is the maximum gap between chunks. For non-streaming, it's the time limit for receiving the complete response. - Failover is automatically triggered when a generator raises any exception, including: + Fail over is automatically triggered when a generator raises any exception, including: - Timeout errors (if the generator implements and raises them) - Rate limit errors (429) - Authentication errors (401) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index 74b9c30d55..924238f7aa 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -180,7 +180,7 @@ def _map_hf_finish_reason_to_haystack( - "eos_token": the model generated its end of sequence token - "stop_sequence": the model generated a text included in `stop_sequences` - Additionally detects tool calls from delta.tool_calls or delta.tool_call_id. + Additionally, detects tool calls from delta.tool_calls or delta.tool_call_id. :param choice: The HuggingFace ChatCompletionStreamOutputChoice object. :returns: The corresponding Haystack FinishReason or None. @@ -263,7 +263,7 @@ class HuggingFaceAPIChatGenerator: ### Usage examples #### With the serverless inference API (Inference Providers) - free tier available - + ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator from haystack.dataclasses import ChatMessage @@ -287,7 +287,7 @@ class HuggingFaceAPIChatGenerator: ``` #### With the serverless inference API (Inference Providers) and text+image input - + ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator from haystack.dataclasses import ChatMessage, ImageContent @@ -314,7 +314,7 @@ class HuggingFaceAPIChatGenerator: ``` #### With paid inference endpoints - + ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator from haystack.dataclasses import ChatMessage @@ -332,7 +332,7 @@ class HuggingFaceAPIChatGenerator: ``` #### With self-hosted text generation inference - + ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator from haystack.dataclasses import ChatMessage diff --git a/haystack/components/generators/chat/hugging_face_local.py b/haystack/components/generators/chat/hugging_face_local.py index 50f027ccb8..ad5edd75d9 100644 --- a/haystack/components/generators/chat/hugging_face_local.py +++ b/haystack/components/generators/chat/hugging_face_local.py @@ -95,7 +95,7 @@ class HuggingFaceLocalChatGenerator: LLMs running locally may need powerful hardware. ### Usage example - + ```python from haystack.components.generators.chat import HuggingFaceLocalChatGenerator from haystack.dataclasses import ChatMessage diff --git a/haystack/components/generators/chat/llm.py b/haystack/components/generators/chat/llm.py index b79bc74afd..5c6837279e 100644 --- a/haystack/components/generators/chat/llm.py +++ b/haystack/components/generators/chat/llm.py @@ -24,7 +24,6 @@ class LLM(Agent): without tool usage. It processes messages and returns a single response from the language model. ### Usage examples - ```python from haystack.components.generators.chat import LLM from haystack.components.generators.chat import OpenAIChatGenerator diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 8064b99525..42de35a4a8 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -68,7 +68,6 @@ class OpenAIChatGenerator: [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat). ### Usage example - ```python from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py index 8d1ec7f891..9b1bc3ad1b 100644 --- a/haystack/components/generators/hugging_face_api.py +++ b/haystack/components/generators/hugging_face_api.py @@ -50,7 +50,7 @@ class HuggingFaceAPIGenerator: ### Usage examples #### With Hugging Face Inference Endpoints - + ```python from haystack.components.generators import HuggingFaceAPIGenerator from haystack.utils import Secret @@ -64,6 +64,7 @@ class HuggingFaceAPIGenerator: ``` #### With self-hosted text generation inference + ```python from haystack.components.generators import HuggingFaceAPIGenerator @@ -80,6 +81,7 @@ class HuggingFaceAPIGenerator: `text_generation` endpoint. Use the `HuggingFaceAPIChatGenerator` for generative models through the `chat_completion` endpoint. + ```python from haystack.components.generators import HuggingFaceAPIGenerator from haystack.utils import Secret diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py index 35bb18da32..e599dd4bf1 100644 --- a/haystack/components/generators/hugging_face_local.py +++ b/haystack/components/generators/hugging_face_local.py @@ -29,7 +29,7 @@ class HuggingFaceLocalGenerator: LLMs running locally may need powerful hardware. ### Usage example - + ```python from haystack.components.generators import HuggingFaceLocalGenerator diff --git a/haystack/components/generators/openai_dalle.py b/haystack/components/generators/openai_dalle.py index f70f2ba3d0..1686287e21 100644 --- a/haystack/components/generators/openai_dalle.py +++ b/haystack/components/generators/openai_dalle.py @@ -22,7 +22,6 @@ class DALLEImageGenerator: [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create). ### Usage example - ```python from haystack.components.generators import DALLEImageGenerator image_generator = DALLEImageGenerator() diff --git a/haystack/components/rankers/hugging_face_tei.py b/haystack/components/rankers/hugging_face_tei.py index 0395ef9c2c..e1ac6f84c7 100644 --- a/haystack/components/rankers/hugging_face_tei.py +++ b/haystack/components/rankers/hugging_face_tei.py @@ -36,6 +36,7 @@ class HuggingFaceTEIRanker: - [Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) Usage example: + ```python from haystack import Document from haystack.components.rankers import HuggingFaceTEIRanker diff --git a/haystack/components/rankers/meta_field_grouping_ranker.py b/haystack/components/rankers/meta_field_grouping_ranker.py index 6ee84a4bc4..8b1208b5e4 100644 --- a/haystack/components/rankers/meta_field_grouping_ranker.py +++ b/haystack/components/rankers/meta_field_grouping_ranker.py @@ -41,18 +41,17 @@ class MetaFieldGroupingRanker: result = ranker.run(documents=docs) print(result["documents"]) - # [ - # Document(id=d665bbc83e52c08c3d8275bccf4f22bf2bfee21c6e77d78794627637355b8ebc, - # content: 'Java is a popular programming language', meta: {'group': '42', 'split_id': 3, 'subgroup': 'subB'}), - # Document(id=a20b326f07382b3cbf2ce156092f7c93e8788df5d48f2986957dce2adb5fe3c2, - # content: 'Python is a popular programming language', meta: {'group': '42', 'split_id': 4, 'subgroup': 'subB'}), - # Document(id=ce12919795d22f6ca214d0f161cf870993889dcb146f3bb1b3e1ffdc95be960f, - # content: 'Javascript is a popular programming language', meta: {'group': '42', 'split_id': 7, 'subgroup': 'subB'}), - # Document(id=d9fc857046c904e5cf790b3969b971b1bbdb1b3037d50a20728fdbf82991aa94, - # content: 'A chromosome is a package of DNA', meta: {'group': '314', 'split_id': 2, 'subgroup': 'subC'}), - # Document(id=6d3b7bdc13d09aa01216471eb5fb0bfdc53c5f2f3e98ad125ff6b85d3106c9a3, - # content: 'An octopus has three hearts', meta: {'group': '11', 'split_id': 2, 'subgroup': 'subD'}) - # ] + # >> [ + # >> Document(id=d665bbc83e52c08c3d8275bccf4f22bf2bfee21c6e77d78794627637355b8ebc, + # >> content: 'Java is a popular programming language', meta: {'group': '42', 'split_id': 3, 'subgroup': 'subB'}), + # >> Document(id=a20b326f07382b3cbf2ce156092f7c93e8788df5d48f2986957dce2adb5fe3c2, + # >> content: 'Python is a popular programming language', meta: {'group': '42', 'split_id': 4, 'subgroup': 'subB'}), + # >> Document(id=ce12919795d22f6ca214d0f161cf870993889dcb146f3bb1b3e1ffdc95be960f, + # >> content: 'Javascript is a popular programming language', meta: {'group': '42', 'split_id': 7, 'subgroup': 'subB'}), + # >> Document(id=d9fc857046c904e5cf790b3969b971b1bbdb1b3037d50a20728fdbf82991aa94, + # >> content: 'A chromosome is a package of DNA', meta: {'group': '314', 'split_id': 2, 'subgroup': 'subC'}), + # >> Document(id=6d3b7bdc13d09aa01216471eb5fb0bfdc53c5f2f3e98ad125ff6b85d3106c9a3, + # >> content: 'An octopus has three hearts', meta: {'group': '11', 'split_id': 2, 'subgroup': 'subD'}) ``` """ # noqa: E501 diff --git a/haystack/components/rankers/sentence_transformers_diversity.py b/haystack/components/rankers/sentence_transformers_diversity.py index f421cd4522..e888e4c0f2 100644 --- a/haystack/components/rankers/sentence_transformers_diversity.py +++ b/haystack/components/rankers/sentence_transformers_diversity.py @@ -99,18 +99,21 @@ class SentenceTransformersDiversityRanker: if a score is present. ### Usage example + ```python from haystack import Document from haystack.components.rankers import SentenceTransformersDiversityRanker - ranker = SentenceTransformersDiversityRanker(model="sentence-transformers/all-MiniLM-L6-v2", similarity="cosine", strategy="greedy_diversity_order") + ranker = SentenceTransformersDiversityRanker( + model="sentence-transformers/all-MiniLM-L6-v2", similarity="cosine", strategy="greedy_diversity_order" + ) docs = [Document(content="Paris"), Document(content="Berlin")] query = "What is the capital of germany?" output = ranker.run(query=query, documents=docs) docs = output["documents"] ``` - """ # noqa: E501 + """ def __init__( # noqa: PLR0913 self, diff --git a/haystack/components/rankers/transformers_similarity.py b/haystack/components/rankers/transformers_similarity.py index 87c94510fe..a40d08ec05 100644 --- a/haystack/components/rankers/transformers_similarity.py +++ b/haystack/components/rankers/transformers_similarity.py @@ -35,7 +35,7 @@ class TransformersSimilarityRanker: additional features. ### Usage example - + ```python from haystack import Document from haystack.components.rankers import TransformersSimilarityRanker diff --git a/haystack/components/routers/llm_messages_router.py b/haystack/components/routers/llm_messages_router.py index 47d2300cbe..09310d6024 100644 --- a/haystack/components/routers/llm_messages_router.py +++ b/haystack/components/routers/llm_messages_router.py @@ -20,6 +20,7 @@ class LLMMessagesRouter: This component can be used with general-purpose LLMs and with specialized LLMs for moderation like Llama Guard. ### Usage example + ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator from haystack.components.routers.llm_messages_router import LLMMessagesRouter @@ -28,7 +29,7 @@ class LLMMessagesRouter: # initialize a Chat Generator with a generative model for moderation chat_generator = HuggingFaceAPIChatGenerator( api_type="serverless_inference_api", - api_params={"model": "meta-llama/Llama-Guard-4-12B", "provider": "groq"}, + api_params={"model": "openai/gpt-oss-safeguard-20b", "provider": "groq"}, ) router = LLMMessagesRouter(chat_generator=chat_generator, diff --git a/haystack/components/routers/transformers_text_router.py b/haystack/components/routers/transformers_text_router.py index 42b498c999..ff95c21b1c 100644 --- a/haystack/components/routers/transformers_text_router.py +++ b/haystack/components/routers/transformers_text_router.py @@ -22,7 +22,7 @@ class TransformersTextRouter: The labels are specific to each model and can be found it its description on Hugging Face. ### Usage example - + ```python from haystack.core.pipeline import Pipeline from haystack.components.routers import TransformersTextRouter diff --git a/haystack/components/routers/zero_shot_text_router.py b/haystack/components/routers/zero_shot_text_router.py index 602c5210f2..b223829e3c 100644 --- a/haystack/components/routers/zero_shot_text_router.py +++ b/haystack/components/routers/zero_shot_text_router.py @@ -23,7 +23,7 @@ class TransformersZeroShotTextRouter: Specify the set of labels for categorization when initializing the component. ### Usage example - + ```python from haystack import Document from haystack.document_stores.in_memory import InMemoryDocumentStore diff --git a/haystack/components/validators/json_schema.py b/haystack/components/validators/json_schema.py index 04318cb5ca..7e2d2b0e2b 100644 --- a/haystack/components/validators/json_schema.py +++ b/haystack/components/validators/json_schema.py @@ -55,8 +55,7 @@ def run(self, messages: list[ChatMessage]) -> dict: p = Pipeline() - p.add_component("llm", OpenAIChatGenerator(model="gpt-4-1106-preview", - generation_kwargs={"response_format": {"type": "json_object"}})) + p.add_component("llm", OpenAIChatGenerator(generation_kwargs={"response_format": {"type": "json_object"}})) p.add_component("schema_validator", JsonSchemaValidator()) p.add_component("joiner_for_llm", BranchJoiner(list[ChatMessage])) p.add_component("message_producer", MessageProducer()) @@ -81,8 +80,8 @@ def run(self, messages: list[ChatMessage]) -> dict: print(result) # >> {'schema_validator': {'validated': [ChatMessage(_role=, # _content=[TextContent(text="\\n{\\n "name": "John",\\n "age": 30\\n}")], - # _name=None, _meta={'model': 'gpt-4-1106-preview', 'index': 0, - # 'finish_reason': 'stop', 'usage': {'completion_tokens': 17, 'prompt_tokens': 20, 'total_tokens': 37}})]}} + # _name=None, _meta={'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 17, 'prompt_tokens': 20, + # 'total_tokens': 37}})]}} ``` """ diff --git a/haystack/components/websearch/searchapi.py b/haystack/components/websearch/searchapi.py index 968f0d7272..71796900d4 100644 --- a/haystack/components/websearch/searchapi.py +++ b/haystack/components/websearch/searchapi.py @@ -24,11 +24,12 @@ class SearchApiWebSearch: Uses [SearchApi](https://www.searchapi.io/) to search the web for relevant documents. Usage example: + ```python from haystack.components.websearch import SearchApiWebSearch from haystack.utils import Secret - websearch = SearchApiWebSearch(top_k=10, api_key=Secret.from_token("test-api-key")) + websearch = SearchApiWebSearch(top_k=10, api_key=Secret.from_env_var("SERPERDEV_API_KEY")) results = websearch.run(query="Who is the boyfriend of Olivia Wilde?") assert results["documents"] diff --git a/haystack/components/websearch/serper_dev.py b/haystack/components/websearch/serper_dev.py index 4b4d80597f..fda18f3dd2 100644 --- a/haystack/components/websearch/serper_dev.py +++ b/haystack/components/websearch/serper_dev.py @@ -27,11 +27,14 @@ class SerperDevWebSearch: See the [Serper Dev website](https://serper.dev/) for more details. Usage example: + ```python from haystack.components.websearch import SerperDevWebSearch from haystack.utils import Secret - websearch = SerperDevWebSearch(top_k=10, api_key=Secret.from_token("test-api-key")) + serper_dev_api = Secret.from_env_var("SERPERDEV_API_KEY") + + websearch = SerperDevWebSearch(top_k=10, api_key=serper_dev_api) results = websearch.run(query="Who is the boyfriend of Olivia Wilde?") assert results["documents"] @@ -42,7 +45,7 @@ class SerperDevWebSearch: top_k=10, allowed_domains=["example.com"], exclude_subdomains=True, # Only results from example.com, not blog.example.com - api_key=Secret.from_token("test-api-key") + api_key=serper_dev_api ) results_filtered = websearch_filtered.run(query="search query") ``` diff --git a/haystack/core/pipeline/pipeline.py b/haystack/core/pipeline/pipeline.py index 4eab78f9d1..fe2e96df48 100644 --- a/haystack/core/pipeline/pipeline.py +++ b/haystack/core/pipeline/pipeline.py @@ -126,12 +126,13 @@ def run( # noqa: PLR0915, PLR0912, C901 Usage: ```python from haystack import Pipeline, Document - from haystack.utils import Secret - from haystack.document_stores.in_memory import InMemoryDocumentStore - from haystack.components.retrievers.in_memory import InMemoryBM25Retriever - from haystack.components.generators import OpenAIGenerator from haystack.components.builders.answer_builder import AnswerBuilder - from haystack.components.builders.prompt_builder import PromptBuilder + from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.components.retrievers.in_memory import InMemoryBM25Retriever + from haystack.dataclasses import ChatMessage + from haystack.document_stores.in_memory import InMemoryDocumentStore + from haystack.utils import Secret # Write documents to InMemoryDocumentStore document_store = InMemoryDocumentStore() @@ -141,6 +142,8 @@ def run( # noqa: PLR0915, PLR0912, C901 Document(content="My name is Giorgio and I live in Rome.") ]) + retriever = InMemoryBM25Retriever(document_store=document_store) + prompt_template = \"\"\" Given these documents, answer the question. Documents: @@ -151,11 +154,14 @@ def run( # noqa: PLR0915, PLR0912, C901 Answer: \"\"\" - retriever = InMemoryBM25Retriever(document_store=document_store) - prompt_builder = PromptBuilder(template=prompt_template) - api_key = "your-openai-api-key" - llm = OpenAIGenerator(api_key=Secret.from_token(api_key)) + template = [ChatMessage.from_user(prompt_template)] + prompt_builder = ChatPromptBuilder( + template=template, + required_variables=["question", "documents"], + variables=["question", "documents"] + ) + llm = OpenAIChatGenerator() rag_pipeline = Pipeline() rag_pipeline.add_component("retriever", retriever) rag_pipeline.add_component("prompt_builder", prompt_builder) @@ -163,7 +169,6 @@ def run( # noqa: PLR0915, PLR0912, C901 rag_pipeline.connect("retriever", "prompt_builder.documents") rag_pipeline.connect("prompt_builder", "llm") - # Ask a question question = "Who lives in Paris?" results = rag_pipeline.run( { @@ -172,7 +177,7 @@ def run( # noqa: PLR0915, PLR0912, C901 } ) - print(results["llm"]["replies"]) + print(results["llm"]["replies"][0].text) # Jean lives in Paris ``` diff --git a/haystack/tools/component_tool.py b/haystack/tools/component_tool.py index ca4d653073..4f1a9e6645 100644 --- a/haystack/tools/component_tool.py +++ b/haystack/tools/component_tool.py @@ -58,7 +58,7 @@ class ComponentTool(Tool): Below is an example of creating a ComponentTool from an existing SerperDevWebSearch component. ## Usage Example: - + ```python from haystack import component, Pipeline from haystack.tools import ComponentTool diff --git a/haystack/utils/requests_utils.py b/haystack/utils/requests_utils.py index c97e01c786..8d870ee676 100644 --- a/haystack/utils/requests_utils.py +++ b/haystack/utils/requests_utils.py @@ -19,6 +19,7 @@ def request_with_retry( Executes an HTTP request with a configurable exponential backoff retry on failures. Usage example: + ```python from haystack.utils import request_with_retry