From e63b3207798a1b2bc8fab4571d49008e950ff8fd Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 20 Oct 2025 16:31:15 +0200 Subject: [PATCH 1/2] docs: add pydoc configurations for Docusaurus --- .../aimlapi/pydoc/config_docusaurus.yml | 28 + .../pydoc/config_docusaurus.yml | 41 ++ .../pydoc/config_docusaurus.yml | 28 + .../anthropic/pydoc/config_docusaurus.yml | 29 + .../astra/pydoc/config_docusaurus.yml | 30 + .../pydoc/config_docusaurus.yml | 30 + integrations/chroma/chroma.md | 667 ++++++++++++++++++ .../chroma/pydoc/config_docusaurus.yml | 31 + .../cohere/pydoc/config_docusaurus.yml | 34 + .../deepeval/pydoc/config_docusaurus.yml | 32 + .../elasticsearch/pydoc/config_docusaurus.yml | 31 + .../fastembed/pydoc/config_docusaurus.yml | 32 + .../github/pydoc/config_docusaurus.yml | 33 + .../google_ai/pydoc/config_docusaurus.yml | 29 + .../google_genai/pydoc/config_docusaurus.yml | 30 + .../google_vertex/pydoc/config_docusaurus.yml | 36 + .../hanlp/pydoc/config_docusaurus.yml | 28 + integrations/jina/pydoc/config_docusaurus.yml | 32 + .../langfuse/pydoc/config_docusaurus.yml | 29 + .../llama_cpp/pydoc/config_docusaurus.yml | 28 + .../llama_stack/pydoc/config_docusaurus.yml | 28 + integrations/mcp/pydoc/config_docusaurus.yml | 29 + .../meta_llama/pydoc/config_docusaurus.yml | 28 + .../mistral/pydoc/config_docusaurus.yml | 30 + .../mongodb_atlas/pydoc/config_docusaurus.yml | 31 + .../nvidia/pydoc/config_docusaurus.yml | 33 + .../ollama/pydoc/config_docusaurus.yml | 31 + .../openrouter/pydoc/config_docusaurus.yml | 28 + .../opensearch/pydoc/config_docusaurus.yml | 32 + .../optimum/pydoc/config_docusaurus.yml | 32 + .../pgvector/pydoc/config_docusaurus.yml | 30 + .../pinecone/pydoc/config_docusaurus.yml | 29 + .../qdrant/pydoc/config_docusaurus.yml | 30 + .../ragas/pydoc/config_docusaurus.yml | 28 + .../snowflake/pydoc/config_docusaurus.yml | 28 + .../stackit/pydoc/config_docusaurus.yml | 30 + .../together_ai/pydoc/config_docusaurus.yml | 29 + .../unstructured/pydoc/config_docusaurus.yml | 28 + .../watsonx/pydoc/config_docusaurus.yml | 31 + .../weaviate/pydoc/config_docusaurus.yml | 32 + .../pydoc/config_docusaurus.yml | 29 + 41 files changed, 1884 insertions(+) create mode 100644 integrations/aimlapi/pydoc/config_docusaurus.yml create mode 100644 integrations/amazon_bedrock/pydoc/config_docusaurus.yml create mode 100644 integrations/amazon_sagemaker/pydoc/config_docusaurus.yml create mode 100644 integrations/anthropic/pydoc/config_docusaurus.yml create mode 100644 integrations/astra/pydoc/config_docusaurus.yml create mode 100644 integrations/azure_ai_search/pydoc/config_docusaurus.yml create mode 100644 integrations/chroma/chroma.md create mode 100644 integrations/chroma/pydoc/config_docusaurus.yml create mode 100644 integrations/cohere/pydoc/config_docusaurus.yml create mode 100644 integrations/deepeval/pydoc/config_docusaurus.yml create mode 100644 integrations/elasticsearch/pydoc/config_docusaurus.yml create mode 100644 integrations/fastembed/pydoc/config_docusaurus.yml create mode 100644 integrations/github/pydoc/config_docusaurus.yml create mode 100644 integrations/google_ai/pydoc/config_docusaurus.yml create mode 100644 integrations/google_genai/pydoc/config_docusaurus.yml create mode 100644 integrations/google_vertex/pydoc/config_docusaurus.yml create mode 100644 integrations/hanlp/pydoc/config_docusaurus.yml create mode 100644 integrations/jina/pydoc/config_docusaurus.yml create mode 100644 integrations/langfuse/pydoc/config_docusaurus.yml create mode 100644 integrations/llama_cpp/pydoc/config_docusaurus.yml create mode 100644 integrations/llama_stack/pydoc/config_docusaurus.yml create mode 100644 integrations/mcp/pydoc/config_docusaurus.yml create mode 100644 integrations/meta_llama/pydoc/config_docusaurus.yml create mode 100644 integrations/mistral/pydoc/config_docusaurus.yml create mode 100644 integrations/mongodb_atlas/pydoc/config_docusaurus.yml create mode 100644 integrations/nvidia/pydoc/config_docusaurus.yml create mode 100644 integrations/ollama/pydoc/config_docusaurus.yml create mode 100644 integrations/openrouter/pydoc/config_docusaurus.yml create mode 100644 integrations/opensearch/pydoc/config_docusaurus.yml create mode 100644 integrations/optimum/pydoc/config_docusaurus.yml create mode 100644 integrations/pgvector/pydoc/config_docusaurus.yml create mode 100644 integrations/pinecone/pydoc/config_docusaurus.yml create mode 100644 integrations/qdrant/pydoc/config_docusaurus.yml create mode 100644 integrations/ragas/pydoc/config_docusaurus.yml create mode 100644 integrations/snowflake/pydoc/config_docusaurus.yml create mode 100644 integrations/stackit/pydoc/config_docusaurus.yml create mode 100644 integrations/together_ai/pydoc/config_docusaurus.yml create mode 100644 integrations/unstructured/pydoc/config_docusaurus.yml create mode 100644 integrations/watsonx/pydoc/config_docusaurus.yml create mode 100644 integrations/weaviate/pydoc/config_docusaurus.yml create mode 100644 integrations/weights_and_biases_weave/pydoc/config_docusaurus.yml diff --git a/integrations/aimlapi/pydoc/config_docusaurus.yml b/integrations/aimlapi/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..559e4839a6 --- /dev/null +++ b/integrations/aimlapi/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.aimlapi.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: AIMLAPI integration for Haystack + id: integrations-aimlapi + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: aimlapi.md + title: AIMLAPI + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/amazon_bedrock/pydoc/config_docusaurus.yml b/integrations/amazon_bedrock/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..86d885cf30 --- /dev/null +++ b/integrations/amazon_bedrock/pydoc/config_docusaurus.yml @@ -0,0 +1,41 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.common.amazon_bedrock.errors + - haystack_integrations.components.embedders.amazon_bedrock.document_embedder + - haystack_integrations.components.embedders.amazon_bedrock.text_embedder + - haystack_integrations.components.embedders.amazon_bedrock.document_image_embedder + - haystack_integrations.components.generators.amazon_bedrock.generator + - haystack_integrations.components.generators.amazon_bedrock.adapters + - haystack_integrations.common.amazon_bedrock.errors + - haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator + - haystack_integrations.components.rankers.amazon_bedrock.ranker + - haystack_integrations.components.downloaders.s3.s3_downloader + - haystack_integrations.common.s3.utils + - haystack_integrations.common.s3.errors + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- expression: name not in ['BedrockRanker'] + type: filter +- type: smart +- type: crossref +renderer: + description: Amazon Bedrock integration for Haystack + id: integrations-amazon-bedrock + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: amazon_bedrock.md + title: Amazon Bedrock + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/amazon_sagemaker/pydoc/config_docusaurus.yml b/integrations/amazon_sagemaker/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..50ef46d176 --- /dev/null +++ b/integrations/amazon_sagemaker/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.amazon_sagemaker.sagemaker + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Amazon Sagemaker integration for Haystack + id: integrations-amazon-sagemaker + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: amazon_sagemaker.md + title: Amazon Sagemaker + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/anthropic/pydoc/config_docusaurus.yml b/integrations/anthropic/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..8019c7a45e --- /dev/null +++ b/integrations/anthropic/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.anthropic.generator + - haystack_integrations.components.generators.anthropic.chat.chat_generator + - haystack_integrations.components.generators.anthropic.chat.vertex_chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Anthropic integration for Haystack + id: integrations-anthropic + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + descriptive_class_title: false + descriptive_module_title: true + filename: anthropic.md + title: Anthropic + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/astra/pydoc/config_docusaurus.yml b/integrations/astra/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..6c42cb49a8 --- /dev/null +++ b/integrations/astra/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.astra.retriever + - haystack_integrations.document_stores.astra.document_store + - haystack_integrations.document_stores.astra.errors + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Astra integration for Haystack + id: integrations-astra + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: astra.md + title: Astra + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/azure_ai_search/pydoc/config_docusaurus.yml b/integrations/azure_ai_search/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..632ebac2d0 --- /dev/null +++ b/integrations/azure_ai_search/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.azure_ai_search.embedding_retriever + - haystack_integrations.document_stores.azure_ai_search.document_store + - haystack_integrations.document_stores.azure_ai_search.filters + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Azure AI Search integration for Haystack + id: integrations-azure_ai_search + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: azure_ai_search.md + title: Azure AI Search + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/chroma/chroma.md b/integrations/chroma/chroma.md new file mode 100644 index 0000000000..1c2f485eb6 --- /dev/null +++ b/integrations/chroma/chroma.md @@ -0,0 +1,667 @@ +--- +title: "Chroma" +id: integrations-chroma +description: "Chroma integration for Haystack" +slug: "/integrations-chroma" +--- + + + +# Module haystack\_integrations.components.retrievers.chroma.retriever + + + +## ChromaQueryTextRetriever + +A component for retrieving documents from a [Chroma database](https://docs.trychroma.com/) using the `query` API. + +Example usage: +```python +from haystack import Pipeline +from haystack.components.converters import TextFileToDocument +from haystack.components.writers import DocumentWriter + +from haystack_integrations.document_stores.chroma import ChromaDocumentStore +from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever + +file_paths = ... + +# Chroma is used in-memory so we use the same instances in the two pipelines below +document_store = ChromaDocumentStore() + +indexing = Pipeline() +indexing.add_component("converter", TextFileToDocument()) +indexing.add_component("writer", DocumentWriter(document_store)) +indexing.connect("converter", "writer") +indexing.run({"converter": {"sources": file_paths}}) + +querying = Pipeline() +querying.add_component("retriever", ChromaQueryTextRetriever(document_store)) +results = querying.run({"retriever": {"query": "Variable declarations", "top_k": 3}}) + +for d in results["retriever"]["documents"]: + print(d.meta, d.score) +``` + + + +#### ChromaQueryTextRetriever.\_\_init\_\_ + +```python +def __init__(document_store: ChromaDocumentStore, + filters: Optional[Dict[str, Any]] = None, + top_k: int = 10, + filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE) +``` + +**Arguments**: + +- `document_store`: an instance of `ChromaDocumentStore`. +- `filters`: filters to narrow down the search space. +- `top_k`: the maximum number of documents to retrieve. +- `filter_policy`: Policy to determine how filters are applied. + + + +#### ChromaQueryTextRetriever.run + +```python +@component.output_types(documents=List[Document]) +def run(query: str, + filters: Optional[Dict[str, Any]] = None, + top_k: Optional[int] = None) -> Dict[str, Any] +``` + +Run the retriever on the given input data. + +**Arguments**: + +- `query`: The input data for the retriever. In this case, a plain-text query. +- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on +the `filter_policy` chosen at retriever initialization. See init method docstring for more +details. +- `top_k`: The maximum number of documents to retrieve. +If not specified, the default value from the constructor is used. + +**Raises**: + +- `ValueError`: If the specified document store is not found or is not a MemoryDocumentStore instance. + +**Returns**: + +A dictionary with the following keys: +- `documents`: List of documents returned by the search engine. + + + +#### ChromaQueryTextRetriever.run\_async + +```python +@component.output_types(documents=List[Document]) +async def run_async(query: str, + filters: Optional[Dict[str, Any]] = None, + top_k: Optional[int] = None) -> Dict[str, Any] +``` + +Asynchronously run the retriever on the given input data. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `query`: The input data for the retriever. In this case, a plain-text query. +- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on +the `filter_policy` chosen at retriever initialization. See init method docstring for more +details. +- `top_k`: The maximum number of documents to retrieve. +If not specified, the default value from the constructor is used. + +**Raises**: + +- `ValueError`: If the specified document store is not found or is not a MemoryDocumentStore instance. + +**Returns**: + +A dictionary with the following keys: +- `documents`: List of documents returned by the search engine. + + + +#### ChromaQueryTextRetriever.from\_dict + +```python +@classmethod +def from_dict(cls, data: Dict[str, Any]) -> "ChromaQueryTextRetriever" +``` + +Deserializes the component from a dictionary. + +**Arguments**: + +- `data`: Dictionary to deserialize from. + +**Returns**: + +Deserialized component. + + + +#### ChromaQueryTextRetriever.to\_dict + +```python +def to_dict() -> Dict[str, Any] +``` + +Serializes the component to a dictionary. + +**Returns**: + +Dictionary with serialized data. + + + +## ChromaEmbeddingRetriever + +A component for retrieving documents from a [Chroma database](https://docs.trychroma.com/) using embeddings. + + + +#### ChromaEmbeddingRetriever.\_\_init\_\_ + +```python +def __init__(document_store: ChromaDocumentStore, + filters: Optional[Dict[str, Any]] = None, + top_k: int = 10, + filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE) +``` + +**Arguments**: + +- `document_store`: an instance of `ChromaDocumentStore`. +- `filters`: filters to narrow down the search space. +- `top_k`: the maximum number of documents to retrieve. +- `filter_policy`: Policy to determine how filters are applied. + + + +#### ChromaEmbeddingRetriever.run + +```python +@component.output_types(documents=List[Document]) +def run(query_embedding: List[float], + filters: Optional[Dict[str, Any]] = None, + top_k: Optional[int] = None) -> Dict[str, Any] +``` + +Run the retriever on the given input data. + +**Arguments**: + +- `query_embedding`: the query embeddings. +- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on +the `filter_policy` chosen at retriever initialization. See init method docstring for more +details. +- `top_k`: the maximum number of documents to retrieve. +If not specified, the default value from the constructor is used. + +**Returns**: + +a dictionary with the following keys: +- `documents`: List of documents returned by the search engine. + + + +#### ChromaEmbeddingRetriever.run\_async + +```python +@component.output_types(documents=List[Document]) +async def run_async(query_embedding: List[float], + filters: Optional[Dict[str, Any]] = None, + top_k: Optional[int] = None) -> Dict[str, Any] +``` + +Asynchronously run the retriever on the given input data. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `query_embedding`: the query embeddings. +- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on +the `filter_policy` chosen at retriever initialization. See init method docstring for more +details. +- `top_k`: the maximum number of documents to retrieve. +If not specified, the default value from the constructor is used. + +**Returns**: + +a dictionary with the following keys: +- `documents`: List of documents returned by the search engine. + + + +#### ChromaEmbeddingRetriever.from\_dict + +```python +@classmethod +def from_dict(cls, data: Dict[str, Any]) -> "ChromaEmbeddingRetriever" +``` + +Deserializes the component from a dictionary. + +**Arguments**: + +- `data`: Dictionary to deserialize from. + +**Returns**: + +Deserialized component. + + + +#### ChromaEmbeddingRetriever.to\_dict + +```python +def to_dict() -> Dict[str, Any] +``` + +Serializes the component to a dictionary. + +**Returns**: + +Dictionary with serialized data. + + + +# Module haystack\_integrations.document\_stores.chroma.document\_store + + + +## ChromaDocumentStore + +A document store using [Chroma](https://docs.trychroma.com/) as the backend. + +We use the `collection.get` API to implement the document store protocol, +the `collection.search` API will be used in the retriever instead. + + + +#### ChromaDocumentStore.\_\_init\_\_ + +```python +def __init__(collection_name: str = "documents", + embedding_function: str = "default", + persist_path: Optional[str] = None, + host: Optional[str] = None, + port: Optional[int] = None, + distance_function: Literal["l2", "cosine", "ip"] = "l2", + metadata: Optional[dict] = None, + **embedding_function_params: Any) +``` + +Creates a new ChromaDocumentStore instance. + +It is meant to be connected to a Chroma collection. + +Note: for the component to be part of a serializable pipeline, the __init__ +parameters must be serializable, reason why we use a registry to configure the +embedding function passing a string. + +**Arguments**: + +- `collection_name`: the name of the collection to use in the database. +- `embedding_function`: the name of the embedding function to use to embed the query +- `persist_path`: Path for local persistent storage. Cannot be used in combination with `host` and `port`. +If none of `persist_path`, `host`, and `port` is specified, the database will be `in-memory`. +- `host`: The host address for the remote Chroma HTTP client connection. Cannot be used with `persist_path`. +- `port`: The port number for the remote Chroma HTTP client connection. Cannot be used with `persist_path`. +- `distance_function`: The distance metric for the embedding space. +- `"l2"` computes the Euclidean (straight-line) distance between vectors, +where smaller scores indicate more similarity. +- `"cosine"` computes the cosine similarity between vectors, +with higher scores indicating greater similarity. +- `"ip"` stands for inner product, where higher scores indicate greater similarity between vectors. +**Note**: `distance_function` can only be set during the creation of a collection. +To change the distance metric of an existing collection, consider cloning the collection. +- `metadata`: a dictionary of chromadb collection parameters passed directly to chromadb's client +method `create_collection`. If it contains the key `"hnsw:space"`, the value will take precedence over the +`distance_function` parameter above. +- `embedding_function_params`: additional parameters to pass to the embedding function. + + + +#### ChromaDocumentStore.count\_documents + +```python +def count_documents() -> int +``` + +Returns how many documents are present in the document store. + +**Returns**: + +how many documents are present in the document store. + + + +#### ChromaDocumentStore.count\_documents\_async + +```python +async def count_documents_async() -> int +``` + +Asynchronously returns how many documents are present in the document store. + +Asynchronous methods are only supported for HTTP connections. + +**Returns**: + +how many documents are present in the document store. + + + +#### ChromaDocumentStore.filter\_documents + +```python +def filter_documents( + filters: Optional[Dict[str, Any]] = None) -> List[Document] +``` + +Returns the documents that match the filters provided. + +For a detailed specification of the filters, +refer to the [documentation](https://docs.haystack.deepset.ai/v2.0/docs/metadata-filtering). + +**Arguments**: + +- `filters`: the filters to apply to the document list. + +**Returns**: + +a list of Documents that match the given filters. + + + +#### ChromaDocumentStore.filter\_documents\_async + +```python +async def filter_documents_async( + filters: Optional[Dict[str, Any]] = None) -> List[Document] +``` + +Asynchronously returns the documents that match the filters provided. + +Asynchronous methods are only supported for HTTP connections. + +For a detailed specification of the filters, +refer to the [documentation](https://docs.haystack.deepset.ai/v2.0/docs/metadata-filtering). + +**Arguments**: + +- `filters`: the filters to apply to the document list. + +**Returns**: + +a list of Documents that match the given filters. + + + +#### ChromaDocumentStore.write\_documents + +```python +def write_documents(documents: List[Document], + policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> int +``` + +Writes (or overwrites) documents into the store. + +**Arguments**: + +- `documents`: A list of documents to write into the document store. +- `policy`: Not supported at the moment. + +**Raises**: + +- `ValueError`: When input is not valid. + +**Returns**: + +The number of documents written + + + +#### ChromaDocumentStore.write\_documents\_async + +```python +async def write_documents_async( + documents: List[Document], + policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> int +``` + +Asynchronously writes (or overwrites) documents into the store. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `documents`: A list of documents to write into the document store. +- `policy`: Not supported at the moment. + +**Raises**: + +- `ValueError`: When input is not valid. + +**Returns**: + +The number of documents written + + + +#### ChromaDocumentStore.delete\_documents + +```python +def delete_documents(document_ids: List[str]) -> None +``` + +Deletes all documents with a matching document_ids from the document store. + +**Arguments**: + +- `document_ids`: the document ids to delete + + + +#### ChromaDocumentStore.delete\_documents\_async + +```python +async def delete_documents_async(document_ids: List[str]) -> None +``` + +Asynchronously deletes all documents with a matching document_ids from the document store. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `document_ids`: the document ids to delete + + + +#### ChromaDocumentStore.search + +```python +def search(queries: List[str], + top_k: int, + filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] +``` + +Search the documents in the store using the provided text queries. + +**Arguments**: + +- `queries`: the list of queries to search for. +- `top_k`: top_k documents to return for each query. +- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. + +**Returns**: + +matching documents for each query. + + + +#### ChromaDocumentStore.search\_async + +```python +async def search_async( + queries: List[str], + top_k: int, + filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] +``` + +Asynchronously search the documents in the store using the provided text queries. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `queries`: the list of queries to search for. +- `top_k`: top_k documents to return for each query. +- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. + +**Returns**: + +matching documents for each query. + + + +#### ChromaDocumentStore.search\_embeddings + +```python +def search_embeddings( + query_embeddings: List[List[float]], + top_k: int, + filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] +``` + +Perform vector search on the stored document, pass the embeddings of the queries instead of their text. + +**Arguments**: + +- `query_embeddings`: a list of embeddings to use as queries. +- `top_k`: the maximum number of documents to retrieve. +- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. + +**Returns**: + +a list of lists of documents that match the given filters. + + + +#### ChromaDocumentStore.search\_embeddings\_async + +```python +async def search_embeddings_async( + query_embeddings: List[List[float]], + top_k: int, + filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] +``` + +Asynchronously perform vector search on the stored document, pass the embeddings of the queries instead of + +their text. + +Asynchronous methods are only supported for HTTP connections. + +**Arguments**: + +- `query_embeddings`: a list of embeddings to use as queries. +- `top_k`: the maximum number of documents to retrieve. +- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. + +**Returns**: + +a list of lists of documents that match the given filters. + + + +#### ChromaDocumentStore.from\_dict + +```python +@classmethod +def from_dict(cls, data: Dict[str, Any]) -> "ChromaDocumentStore" +``` + +Deserializes the component from a dictionary. + +**Arguments**: + +- `data`: Dictionary to deserialize from. + +**Returns**: + +Deserialized component. + + + +#### ChromaDocumentStore.to\_dict + +```python +def to_dict() -> Dict[str, Any] +``` + +Serializes the component to a dictionary. + +**Returns**: + +Dictionary with serialized data. + + + +# Module haystack\_integrations.document\_stores.chroma.errors + + + +## ChromaDocumentStoreError + +Parent class for all ChromaDocumentStore exceptions. + + + +## ChromaDocumentStoreFilterError + +Raised when a filter is not valid for a ChromaDocumentStore. + + + +## ChromaDocumentStoreConfigError + +Raised when a configuration is not valid for a ChromaDocumentStore. + + + +# Module haystack\_integrations.document\_stores.chroma.utils + + + +#### get\_embedding\_function + +```python +def get_embedding_function(function_name: str, + **kwargs: Any) -> EmbeddingFunction +``` + +Load an embedding function by name. + +**Arguments**: + +- `function_name`: the name of the embedding function. +- `kwargs`: additional arguments to pass to the embedding function. + +**Raises**: + +- `ChromaDocumentStoreConfigError`: if the function name is invalid. + +**Returns**: + +the loaded embedding function. + diff --git a/integrations/chroma/pydoc/config_docusaurus.yml b/integrations/chroma/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..bb1644c5dc --- /dev/null +++ b/integrations/chroma/pydoc/config_docusaurus.yml @@ -0,0 +1,31 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.chroma.retriever + - haystack_integrations.document_stores.chroma.document_store + - haystack_integrations.document_stores.chroma.errors + - haystack_integrations.document_stores.chroma.utils + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Chroma integration for Haystack + id: integrations-chroma + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: chroma.md + title: Chroma + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/cohere/pydoc/config_docusaurus.yml b/integrations/cohere/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..d9e82683be --- /dev/null +++ b/integrations/cohere/pydoc/config_docusaurus.yml @@ -0,0 +1,34 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.cohere.document_embedder + - haystack_integrations.components.embedders.cohere.document_image_embedder + - haystack_integrations.components.embedders.cohere.text_embedder + - haystack_integrations.components.embedders.cohere.utils + - haystack_integrations.components.generators.cohere.generator + - haystack_integrations.components.generators.cohere.chat.chat_generator + - haystack_integrations.components.rankers.cohere.ranker + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Cohere integration for Haystack + id: integrations-cohere + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: cohere.md + title: Cohere + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/deepeval/pydoc/config_docusaurus.yml b/integrations/deepeval/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..a772da58e2 --- /dev/null +++ b/integrations/deepeval/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.evaluators.deepeval.evaluator + - haystack_integrations.components.evaluators.deepeval.metrics + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- expression: name not in ['MetricResult', 'MetricDescriptor', 'OutputConverters', + 'InputConverters', 'METRIC_DESCRIPTORS'] + type: filter +- type: smart +- type: crossref +renderer: + description: DeepEval integration for Haystack + id: integrations-deepeval + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: deepeval.md + title: DeepEval + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/elasticsearch/pydoc/config_docusaurus.yml b/integrations/elasticsearch/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..e646ffd973 --- /dev/null +++ b/integrations/elasticsearch/pydoc/config_docusaurus.yml @@ -0,0 +1,31 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.elasticsearch.bm25_retriever + - haystack_integrations.components.retrievers.elasticsearch.embedding_retriever + - haystack_integrations.document_stores.elasticsearch.document_store + - haystack_integrations.document_stores.elasticsearch.filters + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Elasticsearch integration for Haystack + id: integrations-elasticsearch + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: elasticsearch.md + title: Elasticsearch + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/fastembed/pydoc/config_docusaurus.yml b/integrations/fastembed/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..b61886d433 --- /dev/null +++ b/integrations/fastembed/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.fastembed.fastembed_document_embedder + - haystack_integrations.components.embedders.fastembed.fastembed_text_embedder + - haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder + - haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder + - haystack_integrations.components.rankers.fastembed.ranker + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: FastEmbed integration for Haystack + id: fastembed-embedders + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: fastembed.md + title: FastEmbed + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/github/pydoc/config_docusaurus.yml b/integrations/github/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..f12a092f6a --- /dev/null +++ b/integrations/github/pydoc/config_docusaurus.yml @@ -0,0 +1,33 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.connectors.github.file_editor + - haystack_integrations.components.connectors.github.issue_commenter + - haystack_integrations.components.connectors.github.issue_viewer + - haystack_integrations.components.connectors.github.pr_creator + - haystack_integrations.components.connectors.github.repo_viewer + - haystack_integrations.components.connectors.github.repo_forker + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: GitHub integration for Haystack + id: integrations-github + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: github.md + title: GitHub + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/google_ai/pydoc/config_docusaurus.yml b/integrations/google_ai/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..c753c18bfa --- /dev/null +++ b/integrations/google_ai/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.google_ai.gemini + - haystack_integrations.components.generators.google_ai.chat.gemini + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Google AI integration for Haystack + id: integrations-google-ai + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: google_ai.md + title: Google AI + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/google_genai/pydoc/config_docusaurus.yml b/integrations/google_genai/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..f6624f517f --- /dev/null +++ b/integrations/google_genai/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.google_genai.chat.chat_generator + - haystack_integrations.components.embedders.google_genai.document_embedder + - haystack_integrations.components.embedders.google_genai.text_embedder + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Google GenAI integration for Haystack + id: integrations-google-genai + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: google_genai.md + title: Google GenAI + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/google_vertex/pydoc/config_docusaurus.yml b/integrations/google_vertex/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..a98640091a --- /dev/null +++ b/integrations/google_vertex/pydoc/config_docusaurus.yml @@ -0,0 +1,36 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.google_vertex.gemini + - haystack_integrations.components.generators.google_vertex.captioner + - haystack_integrations.components.generators.google_vertex.code_generator + - haystack_integrations.components.generators.google_vertex.image_generator + - haystack_integrations.components.generators.google_vertex.question_answering + - haystack_integrations.components.generators.google_vertex.text_generator + - haystack_integrations.components.generators.google_vertex.chat.gemini + - haystack_integrations.components.embedders.google_vertex.document_embedder + - haystack_integrations.components.embedders.google_vertex.text_embedder + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Google Vertex integration for Haystack + id: integrations-google-vertex + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: google_vertex.md + title: Google Vertex + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/hanlp/pydoc/config_docusaurus.yml b/integrations/hanlp/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..99a77f1cd8 --- /dev/null +++ b/integrations/hanlp/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.preprocessors.hanlp.chinese_document_splitter + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: HanLP integration for Haystack + id: integrations-hanlp + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: github.md + title: HanLP + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/jina/pydoc/config_docusaurus.yml b/integrations/jina/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..022ed9ff49 --- /dev/null +++ b/integrations/jina/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.jina.document_embedder + - haystack_integrations.components.embedders.jina.document_image_embedder + - haystack_integrations.components.embedders.jina.text_embedder + - haystack_integrations.components.rankers.jina.ranker + - haystack_integrations.components.connectors.jina.reader + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Jina integration for Haystack + id: integrations-jina + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: jina.md + title: Jina + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/langfuse/pydoc/config_docusaurus.yml b/integrations/langfuse/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..e532c46529 --- /dev/null +++ b/integrations/langfuse/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.connectors.langfuse.langfuse_connector + - haystack_integrations.tracing.langfuse.tracer + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Langfuse integration for Haystack + id: integrations-langfuse + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: langfuse.md + title: langfuse + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/llama_cpp/pydoc/config_docusaurus.yml b/integrations/llama_cpp/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..4a2b4150b3 --- /dev/null +++ b/integrations/llama_cpp/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.llama_cpp.generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Llama.cpp integration for Haystack + id: integrations-llama-cpp + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: llama_cpp.md + title: Llama.cpp + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/llama_stack/pydoc/config_docusaurus.yml b/integrations/llama_stack/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..1df6a9fd14 --- /dev/null +++ b/integrations/llama_stack/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.llama_stack.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Llama Stack integration for Haystack + id: integrations-llama-stack + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: llama_stack.md + title: Llama Stack + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/mcp/pydoc/config_docusaurus.yml b/integrations/mcp/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..aae9f71b87 --- /dev/null +++ b/integrations/mcp/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.tools.mcp.mcp_tool + - haystack_integrations.tools.mcp.mcp_toolset + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: MCP integration for Haystack + id: integrations-mcp + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: mcp.md + title: MCP + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/meta_llama/pydoc/config_docusaurus.yml b/integrations/meta_llama/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..b5ed2d4f82 --- /dev/null +++ b/integrations/meta_llama/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.meta_llama.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Meta Llama API integration for Haystack + id: integrations-meta-llama + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: meta_llama.md + title: Meta Llama API + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/mistral/pydoc/config_docusaurus.yml b/integrations/mistral/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..9340803a2c --- /dev/null +++ b/integrations/mistral/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.mistral.document_embedder + - haystack_integrations.components.embedders.mistral.text_embedder + - haystack_integrations.components.generators.mistral.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Mistral integration for Haystack + id: integrations-mistral + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: mistral.md + title: Mistral + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/mongodb_atlas/pydoc/config_docusaurus.yml b/integrations/mongodb_atlas/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..ffa92e09eb --- /dev/null +++ b/integrations/mongodb_atlas/pydoc/config_docusaurus.yml @@ -0,0 +1,31 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.document_stores.mongodb_atlas.document_store + - haystack_integrations.document_stores.mongodb_atlas.filters + - haystack_integrations.components.retrievers.mongodb_atlas.embedding_retriever + - haystack_integrations.components.retrievers.mongodb_atlas.full_text_retriever + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: MongoDB Atlas integration for Haystack + id: integrations-mongodb-atlas + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: mongodb_atlas.md + title: MongoDB Atlas + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/nvidia/pydoc/config_docusaurus.yml b/integrations/nvidia/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..d80c585a58 --- /dev/null +++ b/integrations/nvidia/pydoc/config_docusaurus.yml @@ -0,0 +1,33 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.nvidia.document_embedder + - haystack_integrations.components.embedders.nvidia.text_embedder + - haystack_integrations.components.embedders.nvidia.truncate + - haystack_integrations.components.generators.nvidia.generator + - haystack_integrations.components.rankers.nvidia.ranker + - haystack_integrations.components.rankers.nvidia.truncate + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Nvidia integration for Haystack + id: integrations-nvidia + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: nvidia.md + title: Nvidia + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/ollama/pydoc/config_docusaurus.yml b/integrations/ollama/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..b68a10dd65 --- /dev/null +++ b/integrations/ollama/pydoc/config_docusaurus.yml @@ -0,0 +1,31 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.ollama.generator + - haystack_integrations.components.generators.ollama.chat.chat_generator + - haystack_integrations.components.embedders.ollama.document_embedder + - haystack_integrations.components.embedders.ollama.text_embedder + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Ollama integration for Haystack + id: integrations-ollama + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: ollama.md + title: Ollama + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/openrouter/pydoc/config_docusaurus.yml b/integrations/openrouter/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..0535e92194 --- /dev/null +++ b/integrations/openrouter/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.openrouter.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: OpenRouter integration for Haystack + id: integrations-openrouter + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: openrouter.md + title: OpenRouter + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/opensearch/pydoc/config_docusaurus.yml b/integrations/opensearch/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..7e711f6f34 --- /dev/null +++ b/integrations/opensearch/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.opensearch.bm25_retriever + - haystack_integrations.components.retrievers.opensearch.embedding_retriever + - haystack_integrations.components.retrievers.opensearch.open_search_hybrid_retriever + - haystack_integrations.document_stores.opensearch.document_store + - haystack_integrations.document_stores.opensearch.filters + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: OpenSearch integration for Haystack + id: integrations-opensearch + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: opensearch.md + title: OpenSearch + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/optimum/pydoc/config_docusaurus.yml b/integrations/optimum/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..d96df3a070 --- /dev/null +++ b/integrations/optimum/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.embedders.optimum.optimum_document_embedder + - haystack_integrations.components.embedders.optimum.optimum_text_embedder + - haystack_integrations.components.embedders.optimum.pooling + - haystack_integrations.components.embedders.optimum.optimization + - haystack_integrations.components.embedders.optimum.quantization + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Optimum integration for Haystack + id: integrations-optimum + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: optimum.md + title: Optimum + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/pgvector/pydoc/config_docusaurus.yml b/integrations/pgvector/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..22cbfdf3f4 --- /dev/null +++ b/integrations/pgvector/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.pgvector.embedding_retriever + - haystack_integrations.components.retrievers.pgvector.keyword_retriever + - haystack_integrations.document_stores.pgvector.document_store + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Pgvector integration for Haystack + id: integrations-pgvector + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: pgvector.md + title: Pgvector + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/pinecone/pydoc/config_docusaurus.yml b/integrations/pinecone/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..22e0f4ba69 --- /dev/null +++ b/integrations/pinecone/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.pinecone.embedding_retriever + - haystack_integrations.document_stores.pinecone.document_store + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Pinecone integration for Haystack + id: integrations-pinecone + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: pinecone.md + title: Pinecone + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/qdrant/pydoc/config_docusaurus.yml b/integrations/qdrant/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..67ee3f5ae7 --- /dev/null +++ b/integrations/qdrant/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.qdrant.retriever + - haystack_integrations.document_stores.qdrant.document_store + - haystack_integrations.document_stores.qdrant.migrate_to_sparse + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Qdrant integration for Haystack + id: integrations-qdrant + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: qdrant.md + title: Qdrant + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/ragas/pydoc/config_docusaurus.yml b/integrations/ragas/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..69366d5d4f --- /dev/null +++ b/integrations/ragas/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.evaluators.ragas.evaluator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Ragas integration for Haystack + id: integrations-ragas + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: ragas.md + title: Ragas + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/snowflake/pydoc/config_docusaurus.yml b/integrations/snowflake/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..8dca4d4c44 --- /dev/null +++ b/integrations/snowflake/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.retrievers.snowflake.snowflake_table_retriever + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Snowflake integration for Haystack + id: integrations-snowflake + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: snowflake.md + title: Snowflake + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/stackit/pydoc/config_docusaurus.yml b/integrations/stackit/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..eaecc14e12 --- /dev/null +++ b/integrations/stackit/pydoc/config_docusaurus.yml @@ -0,0 +1,30 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.stackit.chat.chat_generator + - haystack_integrations.components.embedders.stackit.document_embedder + - haystack_integrations.components.embedders.stackit.text_embedder + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: STACKIT integration for Haystack + id: integrations-stackit + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: stackit.md + title: STACKIT + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/together_ai/pydoc/config_docusaurus.yml b/integrations/together_ai/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..a230db1f94 --- /dev/null +++ b/integrations/together_ai/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.together_ai.generator + - haystack_integrations.components.generators.together_ai.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Together AI integration for Haystack + id: integrations-together-ai + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: together_ai.md + title: Together AI + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/unstructured/pydoc/config_docusaurus.yml b/integrations/unstructured/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..e35b5f6bfc --- /dev/null +++ b/integrations/unstructured/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.converters.unstructured.converter + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Unstructured integration for Haystack + id: integrations-unstructured + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: unstructured.md + title: Unstructured + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/watsonx/pydoc/config_docusaurus.yml b/integrations/watsonx/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..37b0666e68 --- /dev/null +++ b/integrations/watsonx/pydoc/config_docusaurus.yml @@ -0,0 +1,31 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.watsonx.generator + - haystack_integrations.components.generators.watsonx.chat.chat_generator + - haystack_integrations.components.embedders.watsonx.document_embedder + - haystack_integrations.components.embedders.watsonx.text_embedder + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: IBM watsonx.ai integration for Haystack + id: integrations-watsonx + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: watsonx.md + title: IBM watsonx.ai + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/weaviate/pydoc/config_docusaurus.yml b/integrations/weaviate/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..e5ea3dac23 --- /dev/null +++ b/integrations/weaviate/pydoc/config_docusaurus.yml @@ -0,0 +1,32 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.document_stores.weaviate.auth + - haystack_integrations.document_stores.weaviate.document_store + - haystack_integrations.components.retrievers.weaviate.bm25_retriever + - haystack_integrations.components.retrievers.weaviate.embedding_retriever + - haystack_integrations.components.retrievers.weaviate.hybrid_retriever + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Weaviate integration for Haystack + id: integrations-weaviate + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: weaviate.md + title: Weaviate + type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/weights_and_biases_weave/pydoc/config_docusaurus.yml b/integrations/weights_and_biases_weave/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..2e1f250a10 --- /dev/null +++ b/integrations/weights_and_biases_weave/pydoc/config_docusaurus.yml @@ -0,0 +1,29 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.connectors.weave.weave_connector + - haystack_integrations.tracing.weave.tracer + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: Weights & Bias integration for Haystack + id: integrations-weights-bias + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: weights_and_bias.md + title: weights and bias + type: haystack_pydoc_tools.renderers.DocusaurusRenderer From 72b39e3a5dbfb819721395ed841018ee9beec710 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 20 Oct 2025 17:21:00 +0200 Subject: [PATCH 2/2] rm file --- integrations/chroma/chroma.md | 667 ---------------------------------- 1 file changed, 667 deletions(-) delete mode 100644 integrations/chroma/chroma.md diff --git a/integrations/chroma/chroma.md b/integrations/chroma/chroma.md deleted file mode 100644 index 1c2f485eb6..0000000000 --- a/integrations/chroma/chroma.md +++ /dev/null @@ -1,667 +0,0 @@ ---- -title: "Chroma" -id: integrations-chroma -description: "Chroma integration for Haystack" -slug: "/integrations-chroma" ---- - - - -# Module haystack\_integrations.components.retrievers.chroma.retriever - - - -## ChromaQueryTextRetriever - -A component for retrieving documents from a [Chroma database](https://docs.trychroma.com/) using the `query` API. - -Example usage: -```python -from haystack import Pipeline -from haystack.components.converters import TextFileToDocument -from haystack.components.writers import DocumentWriter - -from haystack_integrations.document_stores.chroma import ChromaDocumentStore -from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever - -file_paths = ... - -# Chroma is used in-memory so we use the same instances in the two pipelines below -document_store = ChromaDocumentStore() - -indexing = Pipeline() -indexing.add_component("converter", TextFileToDocument()) -indexing.add_component("writer", DocumentWriter(document_store)) -indexing.connect("converter", "writer") -indexing.run({"converter": {"sources": file_paths}}) - -querying = Pipeline() -querying.add_component("retriever", ChromaQueryTextRetriever(document_store)) -results = querying.run({"retriever": {"query": "Variable declarations", "top_k": 3}}) - -for d in results["retriever"]["documents"]: - print(d.meta, d.score) -``` - - - -#### ChromaQueryTextRetriever.\_\_init\_\_ - -```python -def __init__(document_store: ChromaDocumentStore, - filters: Optional[Dict[str, Any]] = None, - top_k: int = 10, - filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE) -``` - -**Arguments**: - -- `document_store`: an instance of `ChromaDocumentStore`. -- `filters`: filters to narrow down the search space. -- `top_k`: the maximum number of documents to retrieve. -- `filter_policy`: Policy to determine how filters are applied. - - - -#### ChromaQueryTextRetriever.run - -```python -@component.output_types(documents=List[Document]) -def run(query: str, - filters: Optional[Dict[str, Any]] = None, - top_k: Optional[int] = None) -> Dict[str, Any] -``` - -Run the retriever on the given input data. - -**Arguments**: - -- `query`: The input data for the retriever. In this case, a plain-text query. -- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on -the `filter_policy` chosen at retriever initialization. See init method docstring for more -details. -- `top_k`: The maximum number of documents to retrieve. -If not specified, the default value from the constructor is used. - -**Raises**: - -- `ValueError`: If the specified document store is not found or is not a MemoryDocumentStore instance. - -**Returns**: - -A dictionary with the following keys: -- `documents`: List of documents returned by the search engine. - - - -#### ChromaQueryTextRetriever.run\_async - -```python -@component.output_types(documents=List[Document]) -async def run_async(query: str, - filters: Optional[Dict[str, Any]] = None, - top_k: Optional[int] = None) -> Dict[str, Any] -``` - -Asynchronously run the retriever on the given input data. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `query`: The input data for the retriever. In this case, a plain-text query. -- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on -the `filter_policy` chosen at retriever initialization. See init method docstring for more -details. -- `top_k`: The maximum number of documents to retrieve. -If not specified, the default value from the constructor is used. - -**Raises**: - -- `ValueError`: If the specified document store is not found or is not a MemoryDocumentStore instance. - -**Returns**: - -A dictionary with the following keys: -- `documents`: List of documents returned by the search engine. - - - -#### ChromaQueryTextRetriever.from\_dict - -```python -@classmethod -def from_dict(cls, data: Dict[str, Any]) -> "ChromaQueryTextRetriever" -``` - -Deserializes the component from a dictionary. - -**Arguments**: - -- `data`: Dictionary to deserialize from. - -**Returns**: - -Deserialized component. - - - -#### ChromaQueryTextRetriever.to\_dict - -```python -def to_dict() -> Dict[str, Any] -``` - -Serializes the component to a dictionary. - -**Returns**: - -Dictionary with serialized data. - - - -## ChromaEmbeddingRetriever - -A component for retrieving documents from a [Chroma database](https://docs.trychroma.com/) using embeddings. - - - -#### ChromaEmbeddingRetriever.\_\_init\_\_ - -```python -def __init__(document_store: ChromaDocumentStore, - filters: Optional[Dict[str, Any]] = None, - top_k: int = 10, - filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE) -``` - -**Arguments**: - -- `document_store`: an instance of `ChromaDocumentStore`. -- `filters`: filters to narrow down the search space. -- `top_k`: the maximum number of documents to retrieve. -- `filter_policy`: Policy to determine how filters are applied. - - - -#### ChromaEmbeddingRetriever.run - -```python -@component.output_types(documents=List[Document]) -def run(query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, - top_k: Optional[int] = None) -> Dict[str, Any] -``` - -Run the retriever on the given input data. - -**Arguments**: - -- `query_embedding`: the query embeddings. -- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on -the `filter_policy` chosen at retriever initialization. See init method docstring for more -details. -- `top_k`: the maximum number of documents to retrieve. -If not specified, the default value from the constructor is used. - -**Returns**: - -a dictionary with the following keys: -- `documents`: List of documents returned by the search engine. - - - -#### ChromaEmbeddingRetriever.run\_async - -```python -@component.output_types(documents=List[Document]) -async def run_async(query_embedding: List[float], - filters: Optional[Dict[str, Any]] = None, - top_k: Optional[int] = None) -> Dict[str, Any] -``` - -Asynchronously run the retriever on the given input data. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `query_embedding`: the query embeddings. -- `filters`: Filters applied to the retrieved Documents. The way runtime filters are applied depends on -the `filter_policy` chosen at retriever initialization. See init method docstring for more -details. -- `top_k`: the maximum number of documents to retrieve. -If not specified, the default value from the constructor is used. - -**Returns**: - -a dictionary with the following keys: -- `documents`: List of documents returned by the search engine. - - - -#### ChromaEmbeddingRetriever.from\_dict - -```python -@classmethod -def from_dict(cls, data: Dict[str, Any]) -> "ChromaEmbeddingRetriever" -``` - -Deserializes the component from a dictionary. - -**Arguments**: - -- `data`: Dictionary to deserialize from. - -**Returns**: - -Deserialized component. - - - -#### ChromaEmbeddingRetriever.to\_dict - -```python -def to_dict() -> Dict[str, Any] -``` - -Serializes the component to a dictionary. - -**Returns**: - -Dictionary with serialized data. - - - -# Module haystack\_integrations.document\_stores.chroma.document\_store - - - -## ChromaDocumentStore - -A document store using [Chroma](https://docs.trychroma.com/) as the backend. - -We use the `collection.get` API to implement the document store protocol, -the `collection.search` API will be used in the retriever instead. - - - -#### ChromaDocumentStore.\_\_init\_\_ - -```python -def __init__(collection_name: str = "documents", - embedding_function: str = "default", - persist_path: Optional[str] = None, - host: Optional[str] = None, - port: Optional[int] = None, - distance_function: Literal["l2", "cosine", "ip"] = "l2", - metadata: Optional[dict] = None, - **embedding_function_params: Any) -``` - -Creates a new ChromaDocumentStore instance. - -It is meant to be connected to a Chroma collection. - -Note: for the component to be part of a serializable pipeline, the __init__ -parameters must be serializable, reason why we use a registry to configure the -embedding function passing a string. - -**Arguments**: - -- `collection_name`: the name of the collection to use in the database. -- `embedding_function`: the name of the embedding function to use to embed the query -- `persist_path`: Path for local persistent storage. Cannot be used in combination with `host` and `port`. -If none of `persist_path`, `host`, and `port` is specified, the database will be `in-memory`. -- `host`: The host address for the remote Chroma HTTP client connection. Cannot be used with `persist_path`. -- `port`: The port number for the remote Chroma HTTP client connection. Cannot be used with `persist_path`. -- `distance_function`: The distance metric for the embedding space. -- `"l2"` computes the Euclidean (straight-line) distance between vectors, -where smaller scores indicate more similarity. -- `"cosine"` computes the cosine similarity between vectors, -with higher scores indicating greater similarity. -- `"ip"` stands for inner product, where higher scores indicate greater similarity between vectors. -**Note**: `distance_function` can only be set during the creation of a collection. -To change the distance metric of an existing collection, consider cloning the collection. -- `metadata`: a dictionary of chromadb collection parameters passed directly to chromadb's client -method `create_collection`. If it contains the key `"hnsw:space"`, the value will take precedence over the -`distance_function` parameter above. -- `embedding_function_params`: additional parameters to pass to the embedding function. - - - -#### ChromaDocumentStore.count\_documents - -```python -def count_documents() -> int -``` - -Returns how many documents are present in the document store. - -**Returns**: - -how many documents are present in the document store. - - - -#### ChromaDocumentStore.count\_documents\_async - -```python -async def count_documents_async() -> int -``` - -Asynchronously returns how many documents are present in the document store. - -Asynchronous methods are only supported for HTTP connections. - -**Returns**: - -how many documents are present in the document store. - - - -#### ChromaDocumentStore.filter\_documents - -```python -def filter_documents( - filters: Optional[Dict[str, Any]] = None) -> List[Document] -``` - -Returns the documents that match the filters provided. - -For a detailed specification of the filters, -refer to the [documentation](https://docs.haystack.deepset.ai/v2.0/docs/metadata-filtering). - -**Arguments**: - -- `filters`: the filters to apply to the document list. - -**Returns**: - -a list of Documents that match the given filters. - - - -#### ChromaDocumentStore.filter\_documents\_async - -```python -async def filter_documents_async( - filters: Optional[Dict[str, Any]] = None) -> List[Document] -``` - -Asynchronously returns the documents that match the filters provided. - -Asynchronous methods are only supported for HTTP connections. - -For a detailed specification of the filters, -refer to the [documentation](https://docs.haystack.deepset.ai/v2.0/docs/metadata-filtering). - -**Arguments**: - -- `filters`: the filters to apply to the document list. - -**Returns**: - -a list of Documents that match the given filters. - - - -#### ChromaDocumentStore.write\_documents - -```python -def write_documents(documents: List[Document], - policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> int -``` - -Writes (or overwrites) documents into the store. - -**Arguments**: - -- `documents`: A list of documents to write into the document store. -- `policy`: Not supported at the moment. - -**Raises**: - -- `ValueError`: When input is not valid. - -**Returns**: - -The number of documents written - - - -#### ChromaDocumentStore.write\_documents\_async - -```python -async def write_documents_async( - documents: List[Document], - policy: DuplicatePolicy = DuplicatePolicy.FAIL) -> int -``` - -Asynchronously writes (or overwrites) documents into the store. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `documents`: A list of documents to write into the document store. -- `policy`: Not supported at the moment. - -**Raises**: - -- `ValueError`: When input is not valid. - -**Returns**: - -The number of documents written - - - -#### ChromaDocumentStore.delete\_documents - -```python -def delete_documents(document_ids: List[str]) -> None -``` - -Deletes all documents with a matching document_ids from the document store. - -**Arguments**: - -- `document_ids`: the document ids to delete - - - -#### ChromaDocumentStore.delete\_documents\_async - -```python -async def delete_documents_async(document_ids: List[str]) -> None -``` - -Asynchronously deletes all documents with a matching document_ids from the document store. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `document_ids`: the document ids to delete - - - -#### ChromaDocumentStore.search - -```python -def search(queries: List[str], - top_k: int, - filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] -``` - -Search the documents in the store using the provided text queries. - -**Arguments**: - -- `queries`: the list of queries to search for. -- `top_k`: top_k documents to return for each query. -- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. - -**Returns**: - -matching documents for each query. - - - -#### ChromaDocumentStore.search\_async - -```python -async def search_async( - queries: List[str], - top_k: int, - filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] -``` - -Asynchronously search the documents in the store using the provided text queries. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `queries`: the list of queries to search for. -- `top_k`: top_k documents to return for each query. -- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. - -**Returns**: - -matching documents for each query. - - - -#### ChromaDocumentStore.search\_embeddings - -```python -def search_embeddings( - query_embeddings: List[List[float]], - top_k: int, - filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] -``` - -Perform vector search on the stored document, pass the embeddings of the queries instead of their text. - -**Arguments**: - -- `query_embeddings`: a list of embeddings to use as queries. -- `top_k`: the maximum number of documents to retrieve. -- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. - -**Returns**: - -a list of lists of documents that match the given filters. - - - -#### ChromaDocumentStore.search\_embeddings\_async - -```python -async def search_embeddings_async( - query_embeddings: List[List[float]], - top_k: int, - filters: Optional[Dict[str, Any]] = None) -> List[List[Document]] -``` - -Asynchronously perform vector search on the stored document, pass the embeddings of the queries instead of - -their text. - -Asynchronous methods are only supported for HTTP connections. - -**Arguments**: - -- `query_embeddings`: a list of embeddings to use as queries. -- `top_k`: the maximum number of documents to retrieve. -- `filters`: a dictionary of filters to apply to the search. Accepts filters in haystack format. - -**Returns**: - -a list of lists of documents that match the given filters. - - - -#### ChromaDocumentStore.from\_dict - -```python -@classmethod -def from_dict(cls, data: Dict[str, Any]) -> "ChromaDocumentStore" -``` - -Deserializes the component from a dictionary. - -**Arguments**: - -- `data`: Dictionary to deserialize from. - -**Returns**: - -Deserialized component. - - - -#### ChromaDocumentStore.to\_dict - -```python -def to_dict() -> Dict[str, Any] -``` - -Serializes the component to a dictionary. - -**Returns**: - -Dictionary with serialized data. - - - -# Module haystack\_integrations.document\_stores.chroma.errors - - - -## ChromaDocumentStoreError - -Parent class for all ChromaDocumentStore exceptions. - - - -## ChromaDocumentStoreFilterError - -Raised when a filter is not valid for a ChromaDocumentStore. - - - -## ChromaDocumentStoreConfigError - -Raised when a configuration is not valid for a ChromaDocumentStore. - - - -# Module haystack\_integrations.document\_stores.chroma.utils - - - -#### get\_embedding\_function - -```python -def get_embedding_function(function_name: str, - **kwargs: Any) -> EmbeddingFunction -``` - -Load an embedding function by name. - -**Arguments**: - -- `function_name`: the name of the embedding function. -- `kwargs`: additional arguments to pass to the embedding function. - -**Raises**: - -- `ChromaDocumentStoreConfigError`: if the function name is invalid. - -**Returns**: - -the loaded embedding function. -