From 40d246c7fafd4076025e786fbab6605f3b427c40 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 11 May 2026 12:26:46 +0200 Subject: [PATCH 1/2] docs: add FalkorDB integration documentation Add documentation pages for FalkorDBDocumentStore, FalkorDBEmbeddingRetriever, and FalkorDBCypherRetriever following the haystack-core-integrations PR #3158. Update sidebars.js to include the new pages in the navigation. Co-Authored-By: Claude Sonnet 4.6 --- .../document-stores/falkordbdocumentstore.mdx | 105 +++++++++++++ .../retrievers/falkordbcypherretriever.mdx | 145 ++++++++++++++++++ .../retrievers/falkordbembeddingretriever.mdx | 138 +++++++++++++++++ docs-website/sidebars.js | 3 + 4 files changed, 391 insertions(+) create mode 100644 docs-website/docs/document-stores/falkordbdocumentstore.mdx create mode 100644 docs-website/docs/pipeline-components/retrievers/falkordbcypherretriever.mdx create mode 100644 docs-website/docs/pipeline-components/retrievers/falkordbembeddingretriever.mdx diff --git a/docs-website/docs/document-stores/falkordbdocumentstore.mdx b/docs-website/docs/document-stores/falkordbdocumentstore.mdx new file mode 100644 index 0000000000..8cf1143a84 --- /dev/null +++ b/docs-website/docs/document-stores/falkordbdocumentstore.mdx @@ -0,0 +1,105 @@ +--- +title: "FalkorDBDocumentStore" +id: falkordbdocumentstore +slug: "/falkordbdocumentstore" +description: "Use the FalkorDB graph database with Haystack for GraphRAG workloads." +--- + +# FalkorDBDocumentStore + +Use the FalkorDB graph database with Haystack for GraphRAG workloads. + +
+ +| | | +| --- | --- | +| API reference | [FalkorDB](/reference/integrations-falkordb) | +| GitHub link | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | + +
+ +FalkorDB is a high-performance graph database optimized for GraphRAG workloads. The `FalkorDBDocumentStore` stores documents as graph nodes and supports native vector search — no APOC is required. Documents and their `meta` fields are stored flat on each node, and all bulk writes use `UNWIND` + `MERGE` for safe OpenCypher upserts. + +For more information, see the [FalkorDB documentation](https://docs.falkordb.com/). + +## Installation + +Run FalkorDB with Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +Install the Haystack integration: + +```shell +pip install falkordb-haystack +``` + +## Usage + +Initialize the document store and write documents: + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=768, + recreate_graph=True, +) + +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + ), + ], +) +print(document_store.count_documents()) +``` + +To learn more about the initialization parameters, see the [API docs](/reference/integrations-falkordb#falkordbdocumentstore). + +To compute real embeddings for your documents, use a Document Embedder such as the [`SentenceTransformersDocumentEmbedder`](../pipeline-components/embedders/sentencetransformersdocumentembedder.mdx). + +### Authentication + +To connect to a password-protected FalkorDB instance, pass the password via `Secret`: + +```python +from haystack.utils import Secret +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + password=Secret.from_env_var("FALKORDB_PASSWORD"), +) +``` + +### Similarity Functions + +`FalkorDBDocumentStore` supports two similarity functions for vector search: + +- `"cosine"` (default): cosine similarity, best for normalized embeddings. +- `"euclidean"`: Euclidean distance, useful when embedding magnitude matters. + +```python +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=768, + similarity="euclidean", +) +``` + +### Supported Retrievers + +- [`FalkorDBEmbeddingRetriever`](../pipeline-components/retrievers/falkordbembeddingretriever.mdx): Retrieves documents from the `FalkorDBDocumentStore` based on vector similarity using FalkorDB's native vector index. +- [`FalkorDBCypherRetriever`](../pipeline-components/retrievers/falkordbcypherretriever.mdx): Retrieves documents by executing arbitrary OpenCypher queries, enabling graph traversal and multi-hop queries for GraphRAG pipelines. diff --git a/docs-website/docs/pipeline-components/retrievers/falkordbcypherretriever.mdx b/docs-website/docs/pipeline-components/retrievers/falkordbcypherretriever.mdx new file mode 100644 index 0000000000..336821f040 --- /dev/null +++ b/docs-website/docs/pipeline-components/retrievers/falkordbcypherretriever.mdx @@ -0,0 +1,145 @@ +--- +title: "FalkorDBCypherRetriever" +id: falkordbcypherretriever +slug: "/falkordbcypherretriever" +description: "A Retriever that executes arbitrary OpenCypher queries against a FalkorDB Document Store." +--- + +# FalkorDBCypherRetriever + +A Retriever that executes arbitrary OpenCypher queries against a FalkorDB Document Store. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | After a query-building component and before a [`PromptBuilder`](../builders/promptbuilder.mdx) in a GraphRAG pipeline | +| **Mandatory init variables** | `document_store`: An instance of a [FalkorDBDocumentStore](../../document-stores/falkordbdocumentstore.mdx) | +| **Mandatory run variables** | `query`: An OpenCypher query string (or set `custom_cypher_query` at init) | +| **Output variables** | `documents`: A list of documents | +| **API reference** | [FalkorDB](/reference/integrations-falkordb) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | +| **Package name** | `falkordb-haystack` | + +
+ +## Overview + +The `FalkorDBCypherRetriever` executes arbitrary OpenCypher queries against a `FalkorDBDocumentStore`, making it suitable for graph traversal and multi-hop queries in GraphRAG pipelines. The query must return nodes or dictionaries that map to Haystack `Document` fields. + +A `custom_cypher_query` can be set at initialization and optionally overridden at runtime by passing `query` to `run()`. Use parameterized queries (`$param_name` in Cypher, passed via `parameters`) rather than string interpolation to avoid injection vulnerabilities. + +:::warning[Security] +Raw Cypher queries must only come from trusted sources. Never pass unsanitized user input directly in query strings. Use `parameters` instead. +::: + +## Installation + +```shell +pip install falkordb-haystack +``` + +Ensure FalkorDB is running, for example via Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +## Usage + +### On its own + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import FalkorDBCypherRetriever + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + meta={"topic": "linguistics"}, + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + meta={"topic": "biology"}, + ), + ], +) + +retriever = FalkorDBCypherRetriever( + document_store=document_store, + custom_cypher_query="MATCH (d:Document {topic: $topic}) RETURN d", +) +result = retriever.run(parameters={"topic": "linguistics"}) +print(result["documents"][0].content) +``` + +### In a pipeline + +```python +from haystack import Document, Pipeline +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import HuggingFaceLocalChatGenerator +from haystack.dataclasses import ChatMessage +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import FalkorDBCypherRetriever + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + meta={"topic": "linguistics"}, + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + meta={"topic": "biology"}, + ), + ], +) + +prompt_template = [ + ChatMessage.from_user( + """Given these documents, answer the question. +Documents: +{% for doc in documents %} + {{ doc.content }} +{% endfor %} +Question: {{ question }}""", + ), +] + +pipeline = Pipeline() +pipeline.add_component( + "retriever", + FalkorDBCypherRetriever( + document_store=document_store, + custom_cypher_query="MATCH (d:Document {topic: $topic}) RETURN d", + ), +) +pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_template)) +pipeline.add_component( + "llm", + HuggingFaceLocalChatGenerator(model="HuggingFaceTB/SmolLM2-135M-Instruct"), +) +pipeline.connect("retriever.documents", "prompt_builder.documents") +pipeline.connect("prompt_builder.prompt", "llm.messages") + +result = pipeline.run( + { + "retriever": {"parameters": {"topic": "linguistics"}}, + "prompt_builder": {"question": "How many languages are there?"}, + }, +) +print(result["llm"]["replies"][0].text) +``` diff --git a/docs-website/docs/pipeline-components/retrievers/falkordbembeddingretriever.mdx b/docs-website/docs/pipeline-components/retrievers/falkordbembeddingretriever.mdx new file mode 100644 index 0000000000..f44e59092d --- /dev/null +++ b/docs-website/docs/pipeline-components/retrievers/falkordbembeddingretriever.mdx @@ -0,0 +1,138 @@ +--- +title: "FalkorDBEmbeddingRetriever" +id: falkordbembeddingretriever +slug: "/falkordbembeddingretriever" +description: "An embedding-based Retriever compatible with the FalkorDB Document Store." +--- + +# FalkorDBEmbeddingRetriever + +An embedding-based Retriever compatible with the FalkorDB Document Store. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | 1. After a Text Embedder and before a [`PromptBuilder`](../builders/promptbuilder.mdx) in a RAG pipeline

2. The last component in a semantic search pipeline | +| **Mandatory init variables** | `document_store`: An instance of a [FalkorDBDocumentStore](../../document-stores/falkordbdocumentstore.mdx) | +| **Mandatory run variables** | `query_embedding`: A vector representing the query (a list of floats) | +| **Output variables** | `documents`: A list of documents | +| **API reference** | [FalkorDB](/reference/integrations-falkordb) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | +| **Package name** | `falkordb-haystack` | + +
+ +## Overview + +The `FalkorDBEmbeddingRetriever` retrieves documents from a `FalkorDBDocumentStore` using FalkorDB's native vector index. It compares the query embedding with document embeddings and returns the most similar documents. + +In addition to `query_embedding`, the retriever accepts optional `filters` to narrow the search space and `top_k` to limit the number of results. + +The embedding dimension and similarity function are configured on the `FalkorDBDocumentStore` at initialization time. + +## Installation + +```shell +pip install falkordb-haystack +``` + +Ensure FalkorDB is running, for example via Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +## Usage + +### On its own + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import ( + FalkorDBEmbeddingRetriever, +) + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=3, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + embedding=[0.1, 0.2, 0.3], + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + embedding=[0.8, 0.1, 0.5], + ), + ], +) + +retriever = FalkorDBEmbeddingRetriever(document_store=document_store, top_k=1) +result = retriever.run(query_embedding=[0.1, 0.2, 0.3]) +print(result["documents"][0].content) +``` + +### In a pipeline + +```python +from haystack import Document, Pipeline +from haystack.document_stores.types import DuplicatePolicy +from haystack.components.embedders import ( + SentenceTransformersDocumentEmbedder, + SentenceTransformersTextEmbedder, +) +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import ( + FalkorDBEmbeddingRetriever, +) + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=384, + recreate_graph=True, +) + +documents = [ + Document(content="There are over 7,000 languages spoken around the world today."), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + ), + Document( + content="Bioluminescent waves can be seen in the Maldives and Puerto Rico.", + ), +] + +document_embedder = SentenceTransformersDocumentEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2", +) +document_embedder.warm_up() +documents_with_embeddings = document_embedder.run(documents) + +document_store.write_documents( + documents_with_embeddings["documents"], + policy=DuplicatePolicy.OVERWRITE, +) + +query_pipeline = Pipeline() +query_pipeline.add_component( + "text_embedder", + SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), +) +query_pipeline.add_component( + "retriever", + FalkorDBEmbeddingRetriever(document_store=document_store, top_k=3), +) +query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + +result = query_pipeline.run( + {"text_embedder": {"text": "How many languages are there?"}}, +) +print(result["retriever"]["documents"][0].content) +``` diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 4c85e94737..757478e2be 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -115,6 +115,7 @@ export default { }, 'document-stores/elasticsearch-document-store', 'document-stores/faissdocumentstore', + 'document-stores/falkordbdocumentstore', { type: 'link', label: 'LanceDBDocumentStore', @@ -543,6 +544,8 @@ export default { 'pipeline-components/retrievers/elasticsearchbm25retriever', 'pipeline-components/retrievers/elasticsearchembeddingretriever', 'pipeline-components/retrievers/faissembeddingretriever', + 'pipeline-components/retrievers/falkordbcypherretriever', + 'pipeline-components/retrievers/falkordbembeddingretriever', 'pipeline-components/retrievers/filterretriever', 'pipeline-components/retrievers/inmemorybm25retriever', 'pipeline-components/retrievers/inmemoryembeddingretriever', From 7dbbc3379cfa7d7bb1edffe8c9e1977649c00971 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 11 May 2026 12:27:58 +0200 Subject: [PATCH 2/2] docs: add FalkorDB docs to versioned_docs/version-2.28 Co-Authored-By: Claude Sonnet 4.6 --- .../document-stores/falkordbdocumentstore.mdx | 105 +++++++++++++ .../retrievers/falkordbcypherretriever.mdx | 145 ++++++++++++++++++ .../retrievers/falkordbembeddingretriever.mdx | 138 +++++++++++++++++ .../version-2.28-sidebars.json | 3 + 4 files changed, 391 insertions(+) create mode 100644 docs-website/versioned_docs/version-2.28/document-stores/falkordbdocumentstore.mdx create mode 100644 docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbcypherretriever.mdx create mode 100644 docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbembeddingretriever.mdx diff --git a/docs-website/versioned_docs/version-2.28/document-stores/falkordbdocumentstore.mdx b/docs-website/versioned_docs/version-2.28/document-stores/falkordbdocumentstore.mdx new file mode 100644 index 0000000000..8cf1143a84 --- /dev/null +++ b/docs-website/versioned_docs/version-2.28/document-stores/falkordbdocumentstore.mdx @@ -0,0 +1,105 @@ +--- +title: "FalkorDBDocumentStore" +id: falkordbdocumentstore +slug: "/falkordbdocumentstore" +description: "Use the FalkorDB graph database with Haystack for GraphRAG workloads." +--- + +# FalkorDBDocumentStore + +Use the FalkorDB graph database with Haystack for GraphRAG workloads. + +
+ +| | | +| --- | --- | +| API reference | [FalkorDB](/reference/integrations-falkordb) | +| GitHub link | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | + +
+ +FalkorDB is a high-performance graph database optimized for GraphRAG workloads. The `FalkorDBDocumentStore` stores documents as graph nodes and supports native vector search — no APOC is required. Documents and their `meta` fields are stored flat on each node, and all bulk writes use `UNWIND` + `MERGE` for safe OpenCypher upserts. + +For more information, see the [FalkorDB documentation](https://docs.falkordb.com/). + +## Installation + +Run FalkorDB with Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +Install the Haystack integration: + +```shell +pip install falkordb-haystack +``` + +## Usage + +Initialize the document store and write documents: + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=768, + recreate_graph=True, +) + +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + ), + ], +) +print(document_store.count_documents()) +``` + +To learn more about the initialization parameters, see the [API docs](/reference/integrations-falkordb#falkordbdocumentstore). + +To compute real embeddings for your documents, use a Document Embedder such as the [`SentenceTransformersDocumentEmbedder`](../pipeline-components/embedders/sentencetransformersdocumentembedder.mdx). + +### Authentication + +To connect to a password-protected FalkorDB instance, pass the password via `Secret`: + +```python +from haystack.utils import Secret +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + password=Secret.from_env_var("FALKORDB_PASSWORD"), +) +``` + +### Similarity Functions + +`FalkorDBDocumentStore` supports two similarity functions for vector search: + +- `"cosine"` (default): cosine similarity, best for normalized embeddings. +- `"euclidean"`: Euclidean distance, useful when embedding magnitude matters. + +```python +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=768, + similarity="euclidean", +) +``` + +### Supported Retrievers + +- [`FalkorDBEmbeddingRetriever`](../pipeline-components/retrievers/falkordbembeddingretriever.mdx): Retrieves documents from the `FalkorDBDocumentStore` based on vector similarity using FalkorDB's native vector index. +- [`FalkorDBCypherRetriever`](../pipeline-components/retrievers/falkordbcypherretriever.mdx): Retrieves documents by executing arbitrary OpenCypher queries, enabling graph traversal and multi-hop queries for GraphRAG pipelines. diff --git a/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbcypherretriever.mdx b/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbcypherretriever.mdx new file mode 100644 index 0000000000..336821f040 --- /dev/null +++ b/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbcypherretriever.mdx @@ -0,0 +1,145 @@ +--- +title: "FalkorDBCypherRetriever" +id: falkordbcypherretriever +slug: "/falkordbcypherretriever" +description: "A Retriever that executes arbitrary OpenCypher queries against a FalkorDB Document Store." +--- + +# FalkorDBCypherRetriever + +A Retriever that executes arbitrary OpenCypher queries against a FalkorDB Document Store. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | After a query-building component and before a [`PromptBuilder`](../builders/promptbuilder.mdx) in a GraphRAG pipeline | +| **Mandatory init variables** | `document_store`: An instance of a [FalkorDBDocumentStore](../../document-stores/falkordbdocumentstore.mdx) | +| **Mandatory run variables** | `query`: An OpenCypher query string (or set `custom_cypher_query` at init) | +| **Output variables** | `documents`: A list of documents | +| **API reference** | [FalkorDB](/reference/integrations-falkordb) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | +| **Package name** | `falkordb-haystack` | + +
+ +## Overview + +The `FalkorDBCypherRetriever` executes arbitrary OpenCypher queries against a `FalkorDBDocumentStore`, making it suitable for graph traversal and multi-hop queries in GraphRAG pipelines. The query must return nodes or dictionaries that map to Haystack `Document` fields. + +A `custom_cypher_query` can be set at initialization and optionally overridden at runtime by passing `query` to `run()`. Use parameterized queries (`$param_name` in Cypher, passed via `parameters`) rather than string interpolation to avoid injection vulnerabilities. + +:::warning[Security] +Raw Cypher queries must only come from trusted sources. Never pass unsanitized user input directly in query strings. Use `parameters` instead. +::: + +## Installation + +```shell +pip install falkordb-haystack +``` + +Ensure FalkorDB is running, for example via Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +## Usage + +### On its own + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import FalkorDBCypherRetriever + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + meta={"topic": "linguistics"}, + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + meta={"topic": "biology"}, + ), + ], +) + +retriever = FalkorDBCypherRetriever( + document_store=document_store, + custom_cypher_query="MATCH (d:Document {topic: $topic}) RETURN d", +) +result = retriever.run(parameters={"topic": "linguistics"}) +print(result["documents"][0].content) +``` + +### In a pipeline + +```python +from haystack import Document, Pipeline +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import HuggingFaceLocalChatGenerator +from haystack.dataclasses import ChatMessage +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import FalkorDBCypherRetriever + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + meta={"topic": "linguistics"}, + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + meta={"topic": "biology"}, + ), + ], +) + +prompt_template = [ + ChatMessage.from_user( + """Given these documents, answer the question. +Documents: +{% for doc in documents %} + {{ doc.content }} +{% endfor %} +Question: {{ question }}""", + ), +] + +pipeline = Pipeline() +pipeline.add_component( + "retriever", + FalkorDBCypherRetriever( + document_store=document_store, + custom_cypher_query="MATCH (d:Document {topic: $topic}) RETURN d", + ), +) +pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_template)) +pipeline.add_component( + "llm", + HuggingFaceLocalChatGenerator(model="HuggingFaceTB/SmolLM2-135M-Instruct"), +) +pipeline.connect("retriever.documents", "prompt_builder.documents") +pipeline.connect("prompt_builder.prompt", "llm.messages") + +result = pipeline.run( + { + "retriever": {"parameters": {"topic": "linguistics"}}, + "prompt_builder": {"question": "How many languages are there?"}, + }, +) +print(result["llm"]["replies"][0].text) +``` diff --git a/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbembeddingretriever.mdx b/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbembeddingretriever.mdx new file mode 100644 index 0000000000..f44e59092d --- /dev/null +++ b/docs-website/versioned_docs/version-2.28/pipeline-components/retrievers/falkordbembeddingretriever.mdx @@ -0,0 +1,138 @@ +--- +title: "FalkorDBEmbeddingRetriever" +id: falkordbembeddingretriever +slug: "/falkordbembeddingretriever" +description: "An embedding-based Retriever compatible with the FalkorDB Document Store." +--- + +# FalkorDBEmbeddingRetriever + +An embedding-based Retriever compatible with the FalkorDB Document Store. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | 1. After a Text Embedder and before a [`PromptBuilder`](../builders/promptbuilder.mdx) in a RAG pipeline

2. The last component in a semantic search pipeline | +| **Mandatory init variables** | `document_store`: An instance of a [FalkorDBDocumentStore](../../document-stores/falkordbdocumentstore.mdx) | +| **Mandatory run variables** | `query_embedding`: A vector representing the query (a list of floats) | +| **Output variables** | `documents`: A list of documents | +| **API reference** | [FalkorDB](/reference/integrations-falkordb) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/falkordb | +| **Package name** | `falkordb-haystack` | + +
+ +## Overview + +The `FalkorDBEmbeddingRetriever` retrieves documents from a `FalkorDBDocumentStore` using FalkorDB's native vector index. It compares the query embedding with document embeddings and returns the most similar documents. + +In addition to `query_embedding`, the retriever accepts optional `filters` to narrow the search space and `top_k` to limit the number of results. + +The embedding dimension and similarity function are configured on the `FalkorDBDocumentStore` at initialization time. + +## Installation + +```shell +pip install falkordb-haystack +``` + +Ensure FalkorDB is running, for example via Docker: + +```shell +docker run -d -p 6379:6379 falkordb/falkordb:latest +``` + +## Usage + +### On its own + +```python +from haystack import Document +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import ( + FalkorDBEmbeddingRetriever, +) + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=3, + recreate_graph=True, +) +document_store.write_documents( + [ + Document( + content="There are over 7,000 languages spoken around the world today.", + embedding=[0.1, 0.2, 0.3], + ), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + embedding=[0.8, 0.1, 0.5], + ), + ], +) + +retriever = FalkorDBEmbeddingRetriever(document_store=document_store, top_k=1) +result = retriever.run(query_embedding=[0.1, 0.2, 0.3]) +print(result["documents"][0].content) +``` + +### In a pipeline + +```python +from haystack import Document, Pipeline +from haystack.document_stores.types import DuplicatePolicy +from haystack.components.embedders import ( + SentenceTransformersDocumentEmbedder, + SentenceTransformersTextEmbedder, +) +from haystack_integrations.document_stores.falkordb import FalkorDBDocumentStore +from haystack_integrations.components.retrievers.falkordb import ( + FalkorDBEmbeddingRetriever, +) + +document_store = FalkorDBDocumentStore( + host="localhost", + port=6379, + embedding_dim=384, + recreate_graph=True, +) + +documents = [ + Document(content="There are over 7,000 languages spoken around the world today."), + Document( + content="Elephants have been observed to recognize themselves in mirrors.", + ), + Document( + content="Bioluminescent waves can be seen in the Maldives and Puerto Rico.", + ), +] + +document_embedder = SentenceTransformersDocumentEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2", +) +document_embedder.warm_up() +documents_with_embeddings = document_embedder.run(documents) + +document_store.write_documents( + documents_with_embeddings["documents"], + policy=DuplicatePolicy.OVERWRITE, +) + +query_pipeline = Pipeline() +query_pipeline.add_component( + "text_embedder", + SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), +) +query_pipeline.add_component( + "retriever", + FalkorDBEmbeddingRetriever(document_store=document_store, top_k=3), +) +query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + +result = query_pipeline.run( + {"text_embedder": {"text": "How many languages are there?"}}, +) +print(result["retriever"]["documents"][0].content) +``` diff --git a/docs-website/versioned_sidebars/version-2.28-sidebars.json b/docs-website/versioned_sidebars/version-2.28-sidebars.json index a77106eef3..7a1b68e316 100644 --- a/docs-website/versioned_sidebars/version-2.28-sidebars.json +++ b/docs-website/versioned_sidebars/version-2.28-sidebars.json @@ -109,6 +109,7 @@ }, "document-stores/elasticsearch-document-store", "document-stores/faissdocumentstore", + "document-stores/falkordbdocumentstore", { "type": "link", "label": "LanceDBDocumentStore", @@ -529,6 +530,8 @@ "pipeline-components/retrievers/elasticsearchbm25retriever", "pipeline-components/retrievers/elasticsearchembeddingretriever", "pipeline-components/retrievers/faissembeddingretriever", + "pipeline-components/retrievers/falkordbcypherretriever", + "pipeline-components/retrievers/falkordbembeddingretriever", "pipeline-components/retrievers/filterretriever", "pipeline-components/retrievers/inmemorybm25retriever", "pipeline-components/retrievers/inmemoryembeddingretriever",