From 65f804a681d0cc48df14326aad8bb8e63c7cde0a Mon Sep 17 00:00:00 2001
From: lvca <lvca@users.noreply.github.com>
Date: Sat, 28 Feb 2026 18:02:38 -0500
Subject: [PATCH 01/16] feat: add ArcadeDB document store integration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add ArcadeDBDocumentStore and ArcadeDBEmbeddingRetriever for Haystack 2.x.

ArcadeDB is an open-source multi-model database that combines document
storage, HNSW vector search (LSM_VECTOR), and SQL metadata filtering
in a single engine. This integration connects via the HTTP/JSON API
using only the requests library — no special drivers needed.

Components:
- ArcadeDBDocumentStore: full DocumentStore protocol (count, filter,
  write, delete) with automatic schema/index initialization
- ArcadeDBEmbeddingRetriever: pipeline component for vector similarity
  retrieval with FilterPolicy support
- Filter conversion: Haystack filter dicts → ArcadeDB SQL WHERE clauses
- Document converters: Haystack Document ↔ ArcadeDB record mapping

Includes CI workflow with ArcadeDB Docker service, unit tests for
filter conversion, and integration tests for all DocumentStore operations.
---
 .github/labeler.yml                           |   5 +
 .github/workflows/arcadedb.yml                |  82 ++++
 README.md                                     |   1 +
 integrations/arcadedb/LICENSE.txt             | 190 +++++++++
 integrations/arcadedb/README.md               |  85 ++++
 .../arcadedb/examples/embedding_retrieval.py  |  87 ++++
 .../arcadedb/pydoc/config_docusaurus.yml      |  14 +
 integrations/arcadedb/pyproject.toml          | 117 +++++
 .../retrievers/arcadedb/__init__.py           |   7 +
 .../arcadedb/embedding_retriever.py           | 104 +++++
 .../components/retrievers/py.typed            |   0
 .../document_stores/arcadedb/__init__.py      |   7 +
 .../document_stores/arcadedb/converters.py    |  38 ++
 .../arcadedb/document_store.py                | 400 ++++++++++++++++++
 .../document_stores/arcadedb/filters.py       | 106 +++++
 .../document_stores/py.typed                  |   0
 integrations/arcadedb/tests/__init__.py       |   0
 .../arcadedb/tests/test_document_store.py     | 160 +++++++
 integrations/arcadedb/tests/test_filters.py   |  95 +++++
 19 files changed, 1498 insertions(+)
 create mode 100644 .github/workflows/arcadedb.yml
 create mode 100644 integrations/arcadedb/LICENSE.txt
 create mode 100644 integrations/arcadedb/README.md
 create mode 100644 integrations/arcadedb/examples/embedding_retrieval.py
 create mode 100644 integrations/arcadedb/pydoc/config_docusaurus.yml
 create mode 100644 integrations/arcadedb/pyproject.toml
 create mode 100644 integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/components/retrievers/py.typed
 create mode 100644 integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
 create mode 100644 integrations/arcadedb/src/haystack_integrations/document_stores/py.typed
 create mode 100644 integrations/arcadedb/tests/__init__.py
 create mode 100644 integrations/arcadedb/tests/test_document_store.py
 create mode 100644 integrations/arcadedb/tests/test_filters.py

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 30181024e7..a4add7d750 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -4,6 +4,11 @@ integration:aimlapi:
       - any-glob-to-any-file: "integrations/aimlapi/**/*"
       - any-glob-to-any-file: ".github/workflows/aimlapi.yml"
 
+integration:arcadedb:
+  - changed-files:
+      - any-glob-to-any-file: "integrations/arcadedb/**/*"
+      - any-glob-to-any-file: ".github/workflows/arcadedb.yml"
+
 integration:amazon-bedrock:
   - changed-files:
       - any-glob-to-any-file: "integrations/amazon_bedrock/**/*"
diff --git a/.github/workflows/arcadedb.yml b/.github/workflows/arcadedb.yml
new file mode 100644
index 0000000000..28b57526f9
--- /dev/null
+++ b/.github/workflows/arcadedb.yml
@@ -0,0 +1,82 @@
+# This workflow comes from https://github.com/ofek/hatch-mypyc
+# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
+name: Test / arcadedb
+
+on:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    paths:
+      - "integrations/arcadedb/**"
+      - "!integrations/arcadedb/*.md"
+      - ".github/workflows/arcadedb.yml"
+
+concurrency:
+  group: arcadedb-${{ github.head_ref }}
+  cancel-in-progress: true
+
+env:
+  PYTHONUNBUFFERED: "1"
+  FORCE_COLOR: "1"
+  ARCADEDB_USERNAME: "root"
+  ARCADEDB_PASSWORD: "arcadedb"
+
+defaults:
+  run:
+    working-directory: integrations/arcadedb
+
+jobs:
+  run:
+    name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.10", "3.13"]
+    services:
+      arcadedb:
+        image: arcadedata/arcadedb:latest
+        env:
+          JAVA_OPTS: "-Darcadedb.server.rootPassword=arcadedb"
+        ports:
+          - 2480:2480
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Hatch
+        run: pip install hatch "virtualenv<21.0.0"
+
+      - name: Lint
+        if: matrix.python-version == '3.10' && runner.os == 'Linux'
+        run: hatch run fmt-check && hatch run test:types
+
+      - name: Run tests
+        run: hatch run test:cov-retry
+
+      - name: Run unit tests with lowest direct dependencies
+        run: |
+          hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
+          hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
+          hatch run test:unit
+
+      - name: Nightly - run unit tests with Haystack main branch
+        if: github.event_name == 'schedule'
+        run: |
+          hatch env prune
+          hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
+          hatch run test:unit
+
+      - name: Send event to Datadog for nightly failures
+        if: failure() && github.event_name == 'schedule'
+        uses: ./.github/actions/send_failure
+        with:
+          title: |
+            Core integrations nightly tests failure: ${{ github.workflow }}
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
diff --git a/README.md b/README.md
index 560e659cc6..267300f92c 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [amazon-bedrock-haystack](integrations/amazon_bedrock/)                 | Embedder, Generator, Ranker, Downloader | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)                         | [![Test / amazon_bedrock](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml)                         |
 | [amazon-sagemaker-haystack](integrations/amazon_sagemaker/)             | Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-sagemaker-haystack.svg)](https://pypi.org/project/amazon-sagemaker-haystack)                     | [![Test / amazon_sagemaker](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_sagemaker.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_sagemaker.yml)                   |
 | [anthropic-haystack](integrations/anthropic/)                           | Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/anthropic-haystack.svg)](https://pypi.org/project/anthropic-haystack)                                   | [![Test / anthropic](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/anthropic.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/anthropic.yml)                                        |
+| [arcadedb-haystack](integrations/arcadedb/)                             | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/arcadedb-haystack.svg)](https://pypi.org/project/arcadedb-haystack)                                     | [![Test / arcadedb](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/arcadedb.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/arcadedb.yml)                                           |
 | [astra-haystack](integrations/astra/)                                   | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/astra-haystack.svg)](https://pypi.org/project/astra-haystack)                                           | [![Test / astra](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/astra.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/astra.yml)                                                    |
 | [azure-ai-search-haystack](integrations/azure_ai_search/)               | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/azure-ai-search-haystack.svg)](https://pypi.org/project/azure-ai-search-haystack)                       | [![Test / azure-ai-search](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/azure_ai_search.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/azure_ai_search.yml)                      |
 | [azure-doc-intelligence-haystack](integrations/azure_doc_intelligence/) | Converter                   | [![PyPI - Version](https://img.shields.io/pypi/v/azure-doc-intelligence-haystack.svg)](https://pypi.org/project/azure-doc-intelligence-haystack)         | [![Test / azure_doc_intelligence](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/azure_doc_intelligence.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/azure_doc_intelligence.yml) |
diff --git a/integrations/arcadedb/LICENSE.txt b/integrations/arcadedb/LICENSE.txt
new file mode 100644
index 0000000000..0fa7906e3a
--- /dev/null
+++ b/integrations/arcadedb/LICENSE.txt
@@ -0,0 +1,190 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to the Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by the Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding any notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2025 ArcadeData Ltd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/integrations/arcadedb/README.md b/integrations/arcadedb/README.md
new file mode 100644
index 0000000000..0ee93f0722
--- /dev/null
+++ b/integrations/arcadedb/README.md
@@ -0,0 +1,85 @@
+# arcadedb-haystack
+
+[![PyPI - Version](https://img.shields.io/pypi/v/arcadedb-haystack.svg)](https://pypi.org/project/arcadedb-haystack)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/arcadedb-haystack.svg)](https://pypi.org/project/arcadedb-haystack)
+[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE.txt)
+
+**[ArcadeDB](https://arcadedb.com)** integration for [Haystack](https://haystack.deepset.ai/) 2.x.
+
+ArcadeDB is an open-source multi-model database that combines document storage, HNSW vector search, and SQL metadata filtering in a single engine. This integration provides a `DocumentStore` and `EmbeddingRetriever` that connect to ArcadeDB via its HTTP/JSON API using only the `requests` library -- no special drivers needed.
+
+## Installation
+
+```bash
+pip install arcadedb-haystack
+```
+
+## Usage
+
+Start ArcadeDB:
+
+```bash
+docker run -d -p 2480:2480 \
+    -e JAVA_OPTS="-Darcadedb.server.rootPassword=arcadedb" \
+    arcadedata/arcadedb:latest
+
+export ARCADEDB_USERNAME=root
+export ARCADEDB_PASSWORD=arcadedb
+```
+
+### Document Store
+
+```python
+from haystack import Document
+from haystack.document_stores.types import DuplicatePolicy
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+store = ArcadeDBDocumentStore(
+    database="myproject",
+    embedding_dimension=768,
+)
+
+docs = [
+    Document(
+        content="ArcadeDB supports graphs, documents, and vectors.",
+        embedding=[0.1] * 768,
+        meta={"source": "docs", "category": "database"},
+    )
+]
+store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+store.filter_documents(
+    filters={"field": "meta.category", "operator": "==", "value": "database"}
+)
+```
+
+### Pipeline with Embedding Retriever
+
+```python
+from haystack import Pipeline
+from haystack_integrations.components.retrievers.arcadedb import ArcadeDBEmbeddingRetriever
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+store = ArcadeDBDocumentStore(database="myproject", embedding_dimension=768)
+pipeline = Pipeline()
+pipeline.add_component("retriever", ArcadeDBEmbeddingRetriever(document_store=store, top_k=10))
+
+result = pipeline.run({"retriever": {"query_embedding": [0.1] * 768}})
+```
+
+## Configuration
+
+| Parameter | Default | Description |
+|---|---|---|
+| `url` | `http://localhost:2480` | ArcadeDB HTTP endpoint |
+| `database` | `haystack` | Database name |
+| `username` | env `ARCADEDB_USERNAME` | HTTP Basic Auth username |
+| `password` | env `ARCADEDB_PASSWORD` | HTTP Basic Auth password |
+| `type_name` | `Document` | Vertex type name |
+| `embedding_dimension` | `768` | Vector dimension for HNSW index |
+| `similarity_function` | `cosine` | `cosine`, `euclidean`, or `dot` |
+| `recreate_type` | `False` | Drop and recreate type on init |
+| `create_database` | `True` | Create database if it doesn't exist |
+
+## License
+
+`arcadedb-haystack` is distributed under the terms of the [Apache-2.0](LICENSE.txt) license.
diff --git a/integrations/arcadedb/examples/embedding_retrieval.py b/integrations/arcadedb/examples/embedding_retrieval.py
new file mode 100644
index 0000000000..ffdbad3acd
--- /dev/null
+++ b/integrations/arcadedb/examples/embedding_retrieval.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Example: Embedding retrieval with ArcadeDB + Haystack.
+
+Prerequisites:
+    docker run -d -p 2480:2480 \
+        -e JAVA_OPTS="-Darcadedb.server.rootPassword=arcadedb" \
+        arcadedata/arcadedb:latest
+
+    pip install arcadedb-haystack
+
+Usage:
+    export ARCADEDB_USERNAME=root
+    export ARCADEDB_PASSWORD=arcadedb
+    python examples/embedding_retrieval.py
+"""
+
+from haystack import Document, Pipeline
+from haystack.document_stores.types import DuplicatePolicy
+
+from haystack_integrations.components.retrievers.arcadedb import ArcadeDBEmbeddingRetriever
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+# --- 1. Create the document store ---
+store = ArcadeDBDocumentStore(
+    url="http://localhost:2480",
+    database="haystack_example",
+    embedding_dimension=4,  # small dim for demo
+    similarity_function="cosine",
+    recreate_type=True,
+)
+
+# --- 2. Write some documents ---
+documents = [
+    Document(
+        content="ArcadeDB is a multi-model database supporting graphs, documents, key-value, time-series, and vectors.",
+        embedding=[1.0, 0.0, 0.0, 0.0],
+        meta={"category": "database", "source": "docs"},
+    ),
+    Document(
+        content="Haystack is an open-source framework for building RAG pipelines.",
+        embedding=[0.0, 1.0, 0.0, 0.0],
+        meta={"category": "framework", "source": "docs"},
+    ),
+    Document(
+        content="HNSW (Hierarchical Navigable Small World) enables fast approximate nearest neighbor search.",
+        embedding=[0.5, 0.5, 0.0, 0.0],
+        meta={"category": "algorithm", "source": "paper"},
+    ),
+    Document(
+        content="Vector databases store high-dimensional embeddings for semantic search.",
+        embedding=[0.8, 0.2, 0.0, 0.0],
+        meta={"category": "database", "source": "blog"},
+    ),
+]
+
+written = store.write_documents(documents, policy=DuplicatePolicy.OVERWRITE)
+print(f"Wrote {written} documents")
+print(f"Total documents: {store.count_documents()}")
+
+# --- 3. Build a retrieval pipeline ---
+pipeline = Pipeline()
+pipeline.add_component("retriever", ArcadeDBEmbeddingRetriever(document_store=store, top_k=3))
+
+# --- 4. Run a similarity search ---
+query_embedding = [0.9, 0.1, 0.0, 0.0]  # close to "ArcadeDB" and "Vector databases"
+result = pipeline.run({"retriever": {"query_embedding": query_embedding}})
+
+print("\n--- Top 3 results ---")
+for doc in result["retriever"]["documents"]:
+    print(f"  score={doc.score:.4f}  category={doc.meta.get('category')}  content={doc.content[:80]}...")
+
+# --- 5. Filter retrieval (only 'database' category) ---
+result_filtered = pipeline.run({
+    "retriever": {
+        "query_embedding": query_embedding,
+        "filters": {"field": "meta.category", "operator": "==", "value": "database"},
+    }
+})
+
+print("\n--- Filtered (category=database) ---")
+for doc in result_filtered["retriever"]["documents"]:
+    print(f"  score={doc.score:.4f}  content={doc.content[:80]}...")
diff --git a/integrations/arcadedb/pydoc/config_docusaurus.yml b/integrations/arcadedb/pydoc/config_docusaurus.yml
new file mode 100644
index 0000000000..e3cb005d73
--- /dev/null
+++ b/integrations/arcadedb/pydoc/config_docusaurus.yml
@@ -0,0 +1,14 @@
+loaders:
+  - modules:
+      - haystack_integrations.components.retrievers.arcadedb.embedding_retriever
+      - haystack_integrations.document_stores.arcadedb.document_store
+    search_path: [../src]
+processors:
+  - type: filter
+    documented_only: true
+    skip_empty_modules: true
+renderer:
+  description: ArcadeDB integration for Haystack
+  id: integrations-arcadedb
+  filename: arcadedb.md
+  title: ArcadeDB
diff --git a/integrations/arcadedb/pyproject.toml b/integrations/arcadedb/pyproject.toml
new file mode 100644
index 0000000000..c546f3dd87
--- /dev/null
+++ b/integrations/arcadedb/pyproject.toml
@@ -0,0 +1,117 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "arcadedb-haystack"
+dynamic = ["version"]
+description = "An integration of ArcadeDB with Haystack — document storage + HNSW vector search + SQL filtering"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "Apache-2.0"
+keywords = ["arcadedb", "haystack", "vector-search", "document-store", "rag"]
+authors = [{ name = "ArcadeData Ltd", email = "info@arcadedb.com" }]
+classifiers = [
+  "License :: OSI Approved :: Apache Software License",
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+  "haystack-ai>=2.9.0",
+  "requests",
+]
+
+[project.urls]
+Source = "https://github.com/deepset-ai/haystack-core-integrations"
+Documentation = "https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/arcadedb/README.md"
+Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/haystack_integrations"]
+
+[tool.hatch.version]
+source = "vcs"
+tag-pattern = 'integrations\/arcadedb-v(?P<version>.*)'
+
+[tool.hatch.version.raw-options]
+root = "../.."
+git_describe_command = 'git describe --tags --match="integrations/arcadedb-v[0-9]*"'
+
+[tool.hatch.envs.default]
+installer = "uv"
+dependencies = ["haystack-pydoc-tools", "ruff"]
+
+[tool.hatch.envs.default.scripts]
+docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
+fmt = "ruff check --fix {args}; ruff format {args}"
+fmt-check = "ruff check {args} && ruff format --check {args}"
+
+[tool.hatch.envs.test]
+dependencies = [
+    "pytest",
+    "pytest-cov",
+    "pytest-rerunfailures",
+    "mypy",
+    "pip",
+]
+
+[tool.hatch.envs.test.scripts]
+unit = 'pytest -m "not integration" {args:tests}'
+integration = 'pytest -m "integration" {args:tests}'
+all = 'pytest {args:tests}'
+cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
+types = "mypy -p haystack_integrations.document_stores.arcadedb -p haystack_integrations.components.retrievers.arcadedb {args}"
+
+[tool.mypy]
+install_types = true
+non_interactive = true
+check_untyped_defs = true
+disallow_incomplete_defs = true
+
+[[tool.mypy.overrides]]
+module = ["requests.*"]
+ignore_missing_imports = true
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+  "A", "ARG", "B", "C", "DTZ", "E", "EM", "F", "FBT", "I", "ICN",
+  "ISC", "N", "PLC", "PLE", "PLR", "PLW", "Q", "RUF", "S", "T",
+  "TID", "UP", "W", "YTT",
+]
+ignore = [
+  "B027", "FBT003", "S105", "S106", "S107",
+  "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
+  "B008", "S101",
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["haystack_integrations"]
+
+[tool.ruff.lint.flake8-tidy-imports]
+ban-relative-imports = "parents"
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*" = ["PLR2004", "S101", "TID252"]
+"examples/**/*" = ["T201"]
+
+[tool.coverage.run]
+source = ["haystack_integrations"]
+branch = true
+parallel = false
+
+[tool.coverage.report]
+omit = ["*/tests/*", "*/__init__.py"]
+show_missing = true
+exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
+
+[tool.pytest.ini_options]
+markers = ["integration: integration tests"]
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
new file mode 100644
index 0000000000..eb4a7bfbe5
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from haystack_integrations.components.retrievers.arcadedb.embedding_retriever import ArcadeDBEmbeddingRetriever
+
+__all__ = ["ArcadeDBEmbeddingRetriever"]
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
new file mode 100644
index 0000000000..1624055eb9
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -0,0 +1,104 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""ArcadeDB Embedding Retriever for Haystack 2.x pipelines."""
+
+from typing import Any
+
+from haystack import Document, component, default_from_dict, default_to_dict
+from haystack.document_stores.types import FilterPolicy
+
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+
+@component
+class ArcadeDBEmbeddingRetriever:
+    """
+    Retrieve documents from ArcadeDB using vector similarity (LSM_VECTOR / HNSW index).
+
+    Usage example:
+
+    ```python
+    from haystack_integrations.components.retrievers.arcadedb import ArcadeDBEmbeddingRetriever
+    from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+    store = ArcadeDBDocumentStore(database="mydb")
+    retriever = ArcadeDBEmbeddingRetriever(document_store=store, top_k=5)
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        document_store: ArcadeDBDocumentStore,
+        filters: dict[str, Any] | None = None,
+        top_k: int = 10,
+        filter_policy: FilterPolicy = FilterPolicy.REPLACE,
+    ):
+        """
+        Create an ArcadeDBEmbeddingRetriever.
+
+        :param document_store: An instance of ``ArcadeDBDocumentStore``.
+        :param filters: Default filters applied to every retrieval call.
+        :param top_k: Maximum number of documents to return.
+        :param filter_policy: How runtime filters interact with default filters.
+        """
+        self._document_store = document_store
+        self._filters = filters
+        self._top_k = top_k
+        self._filter_policy = filter_policy
+
+    @component.output_types(documents=list[Document])
+    def run(
+        self,
+        query_embedding: list[float],
+        filters: dict[str, Any] | None = None,
+        top_k: int | None = None,
+    ) -> dict[str, list[Document]]:
+        """
+        Retrieve documents by vector similarity.
+
+        :param query_embedding: The embedding vector to search with.
+        :param filters: Optional filters to narrow results.
+        :param top_k: Maximum number of documents to return.
+        :returns: A dict with key ``"documents"`` containing the retrieved documents.
+        """
+        effective_top_k = top_k if top_k is not None else self._top_k
+
+        if self._filter_policy == FilterPolicy.REPLACE and filters is not None:
+            effective_filters = filters
+        elif self._filter_policy == FilterPolicy.MERGE and filters is not None and self._filters is not None:
+            effective_filters = {
+                "operator": "AND",
+                "conditions": [self._filters, filters],
+            }
+        else:
+            effective_filters = filters or self._filters
+
+        documents = self._document_store._embedding_retrieval(
+            query_embedding=query_embedding,
+            filters=effective_filters,
+            top_k=effective_top_k,
+        )
+        return {"documents": documents}
+
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize this retriever to a dictionary."""
+        return default_to_dict(
+            self,
+            document_store=self._document_store.to_dict(),
+            filters=self._filters,
+            top_k=self._top_k,
+            filter_policy=self._filter_policy.value,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ArcadeDBEmbeddingRetriever":
+        """Deserialize an ArcadeDBEmbeddingRetriever from a dictionary."""
+        init_params = data.get("init_parameters", {})
+        if "document_store" in init_params:
+            init_params["document_store"] = ArcadeDBDocumentStore.from_dict(init_params["document_store"])
+        if "filter_policy" in init_params:
+            init_params["filter_policy"] = FilterPolicy(init_params["filter_policy"])
+        return default_from_dict(cls, data)
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/py.typed b/integrations/arcadedb/src/haystack_integrations/components/retrievers/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
new file mode 100644
index 0000000000..3676df644a
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from haystack_integrations.document_stores.arcadedb.document_store import ArcadeDBDocumentStore
+
+__all__ = ["ArcadeDBDocumentStore"]
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
new file mode 100644
index 0000000000..ccafefae70
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Convert between Haystack Documents and ArcadeDB records."""
+
+from typing import Any
+
+from haystack import Document
+
+
+def _from_haystack_to_arcadedb(documents: list[Document]) -> list[dict[str, Any]]:
+    """Convert Haystack Documents to dicts suitable for ArcadeDB INSERT."""
+    records = []
+    for doc in documents:
+        record: dict[str, Any] = {
+            "id": doc.id,
+            "content": doc.content,
+            "embedding": doc.embedding,
+            "meta": doc.meta,
+        }
+        records.append(record)
+    return records
+
+
+def _from_arcadedb_to_haystack(records: list[dict[str, Any]]) -> list[Document]:
+    """Convert ArcadeDB query result rows to Haystack Documents."""
+    documents = []
+    for record in records:
+        doc = Document(
+            id=record["id"],
+            content=record.get("content"),
+            embedding=record.get("embedding"),
+            meta=record.get("meta") or {},
+            score=record.get("score"),
+        )
+        documents.append(doc)
+    return documents
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
new file mode 100644
index 0000000000..1b2a1e3596
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -0,0 +1,400 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""ArcadeDB DocumentStore for Haystack 2.x — document storage + vector search via HTTP/JSON API."""
+
+import logging
+from typing import Any, ClassVar
+
+import requests
+from haystack import Document, default_from_dict, default_to_dict
+from haystack.document_stores.errors import DuplicateDocumentError
+from haystack.document_stores.types import DuplicatePolicy
+from haystack.utils import Secret
+
+from haystack_integrations.document_stores.arcadedb.converters import (
+    _from_arcadedb_to_haystack,
+    _from_haystack_to_arcadedb,
+)
+from haystack_integrations.document_stores.arcadedb.filters import _convert_filters
+
+logger = logging.getLogger(__name__)
+
+
+class ArcadeDBDocumentStore:
+    """
+    An ArcadeDB-backed DocumentStore for Haystack 2.x.
+
+    Uses ArcadeDB's HTTP/JSON API for all operations — no special drivers required.
+    Supports HNSW vector search (LSM_VECTOR) and SQL metadata filtering.
+
+    Usage example:
+
+    ```python
+    from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+    store = ArcadeDBDocumentStore(
+        url="http://localhost:2480",
+        database="haystack",
+        embedding_dimension=768,
+    )
+    ```
+    """
+
+    # Map user-facing similarity names to ArcadeDB LSM_VECTOR metric keywords
+    _SIMILARITY_MAP: ClassVar[dict[str, str]] = {
+        "cosine": "COSINE",
+        "euclidean": "EUCLIDEAN",
+        "dot": "DOT_PRODUCT",
+    }
+
+    def __init__(
+        self,
+        *,
+        url: str = "http://localhost:2480",
+        database: str = "haystack",
+        username: Secret = Secret.from_env_var("ARCADEDB_USERNAME", strict=False),  # noqa: B008
+        password: Secret = Secret.from_env_var("ARCADEDB_PASSWORD", strict=False),  # noqa: B008
+        type_name: str = "Document",
+        embedding_dimension: int = 768,
+        similarity_function: str = "cosine",
+        recreate_type: bool = False,
+        create_database: bool = True,
+    ):
+        """
+        Create an ArcadeDBDocumentStore instance.
+
+        :param url: ArcadeDB HTTP endpoint.
+        :param database: Database name.
+        :param username: HTTP Basic Auth username (default: ``ARCADEDB_USERNAME`` env var).
+        :param password: HTTP Basic Auth password (default: ``ARCADEDB_PASSWORD`` env var).
+        :param type_name: Vertex type name for documents.
+        :param embedding_dimension: Vector dimension for the HNSW index.
+        :param similarity_function: Distance metric — ``"cosine"``, ``"euclidean"``, or ``"dot"``.
+        :param recreate_type: If ``True``, drop and recreate the type on initialization.
+        :param create_database: If ``True``, create the database if it doesn't exist.
+        """
+        self._url = url.rstrip("/")
+        self._database = database
+        self._username = username
+        self._password = password
+        self._type_name = type_name
+        self._embedding_dimension = embedding_dimension
+        self._similarity_function = similarity_function
+        self._recreate_type = recreate_type
+        self._create_database = create_database
+
+        self._session = requests.Session()
+        self._initialized = False
+
+    # ------------------------------------------------------------------
+    # Serialization (Haystack pipeline export/import)
+    # ------------------------------------------------------------------
+
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize this store to a dictionary."""
+        return default_to_dict(
+            self,
+            url=self._url,
+            database=self._database,
+            username=self._username.to_dict() if self._username else None,
+            password=self._password.to_dict() if self._password else None,
+            type_name=self._type_name,
+            embedding_dimension=self._embedding_dimension,
+            similarity_function=self._similarity_function,
+            recreate_type=self._recreate_type,
+            create_database=self._create_database,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ArcadeDBDocumentStore":
+        """Deserialize an ArcadeDBDocumentStore from a dictionary."""
+        init_params = data.get("init_parameters", {})
+        for key in ("username", "password"):
+            if init_params.get(key) is not None:
+                init_params[key] = Secret.from_dict(init_params[key])
+        return default_from_dict(cls, data)
+
+    # ------------------------------------------------------------------
+    # HTTP helpers
+    # ------------------------------------------------------------------
+
+    def _auth(self) -> tuple[str, str] | None:
+        user = self._username.resolve_value() if self._username else None
+        pwd = self._password.resolve_value() if self._password else None
+        if user and pwd:
+            return (user, pwd)
+        return None
+
+    def _command(self, sql: str, *, positional_params: list[Any] | None = None) -> list[dict[str, Any]]:
+        """Execute an SQL command via the ArcadeDB HTTP API and return result rows."""
+        url = f"{self._url}/api/v1/command/{self._database}"
+        payload: dict[str, Any] = {"language": "sql", "command": sql}
+        if positional_params:
+            payload["params"] = positional_params
+
+        resp = self._session.post(url, json=payload, auth=self._auth())
+        if resp.status_code >= 400:
+            msg = f"ArcadeDB command failed ({resp.status_code}): {resp.text}"
+            raise RuntimeError(msg)
+
+        body = resp.json()
+        return body.get("result", [])
+
+    def _server_command(self, command: str) -> dict[str, Any]:
+        """Execute a server-level command (e.g. CREATE DATABASE)."""
+        url = f"{self._url}/api/v1/server"
+        resp = self._session.post(url, json={"command": command}, auth=self._auth())
+        if resp.status_code >= 400:
+            msg = f"ArcadeDB server command failed ({resp.status_code}): {resp.text}"
+            raise RuntimeError(msg)
+        return resp.json()
+
+    def _ensure_initialized(self) -> None:
+        if self._initialized:
+            return
+
+        # 1. Optionally create the database
+        if self._create_database:
+            try:
+                self._server_command(f"CREATE DATABASE {self._database}")
+                logger.info("Created database '%s'", self._database)
+            except RuntimeError:
+                logger.debug("Database '%s' already exists or cannot be created", self._database)
+
+        # 2. Optionally drop existing type
+        if self._recreate_type:
+            try:
+                self._command(f"DROP TYPE `{self._type_name}` IF EXISTS UNSAFE")
+            except RuntimeError:
+                pass
+
+        # 3. Create vertex type + properties
+        self._command(f"CREATE VERTEX TYPE `{self._type_name}` IF NOT EXISTS")
+        self._command(f"CREATE PROPERTY `{self._type_name}`.id IF NOT EXISTS STRING")
+        self._command(f"CREATE PROPERTY `{self._type_name}`.content IF NOT EXISTS STRING")
+        self._command(f"CREATE PROPERTY `{self._type_name}`.embedding IF NOT EXISTS ARRAY_OF_FLOATS")
+        self._command(f"CREATE PROPERTY `{self._type_name}`.meta IF NOT EXISTS MAP")
+
+        # 4. Unique index on id
+        try:
+            self._command(f"CREATE INDEX ON `{self._type_name}` (id) UNIQUE")
+        except RuntimeError:
+            logger.debug("Unique index on id already exists")
+
+        # 5. LSM_VECTOR index on embedding (HNSW-based, ACID-compliant)
+        metric = self._SIMILARITY_MAP.get(self._similarity_function, "COSINE")
+        try:
+            self._command(
+                f"CREATE INDEX IF NOT EXISTS ON `{self._type_name}` (embedding) LSM_VECTOR "
+                f"METADATA {{ dimensions: {self._embedding_dimension}, similarity: '{metric}' }}"
+            )
+        except RuntimeError:
+            logger.debug("Vector index on embedding already exists")
+
+        self._initialized = True
+        logger.info(
+            "ArcadeDBDocumentStore initialized: database=%s, type=%s, dim=%d, metric=%s",
+            self._database,
+            self._type_name,
+            self._embedding_dimension,
+            metric,
+        )
+
+    # ------------------------------------------------------------------
+    # DocumentStore protocol
+    # ------------------------------------------------------------------
+
+    def count_documents(self) -> int:
+        """Return the number of documents stored."""
+        self._ensure_initialized()
+        rows = self._command(f"SELECT count(*) AS cnt FROM `{self._type_name}`")
+        if rows:
+            return int(rows[0].get("cnt", 0))
+        return 0
+
+    def filter_documents(
+        self,
+        filters: dict[str, Any] | None = None,
+    ) -> list[Document]:
+        """
+        Return documents matching the given filters.
+
+        :param filters: Haystack filter dictionary.
+        :returns: List of matching documents.
+        """
+        self._ensure_initialized()
+        where = _convert_filters(filters)
+        sql = f"SELECT * FROM `{self._type_name}`"
+        if where:
+            sql += f" WHERE {where}"
+        rows = self._command(sql)
+        return _from_arcadedb_to_haystack(rows)
+
+    def write_documents(
+        self,
+        documents: list[Document],
+        policy: DuplicatePolicy = DuplicatePolicy.NONE,
+    ) -> int:
+        """
+        Write documents to the store.
+
+        :param documents: List of Haystack Documents to write.
+        :param policy: How to handle duplicate document IDs.
+        :returns: Number of documents written.
+        """
+        self._ensure_initialized()
+        if not documents:
+            return 0
+
+        records = _from_haystack_to_arcadedb(documents)
+        written = 0
+
+        for record in records:
+            embedding_str = str(record["embedding"]) if record["embedding"] else "[]"
+            meta_str = _map_literal(record["meta"]) if record["meta"] else "{}"
+
+            if policy == DuplicatePolicy.OVERWRITE:
+                sql = (
+                    f"UPDATE `{self._type_name}` SET "
+                    f"content = {_sql_str(record['content'])}, "
+                    f"embedding = {embedding_str}, "
+                    f"meta = {meta_str} "
+                    f"WHERE id = {_sql_str(record['id'])}"
+                )
+                result = self._command(sql)
+                updated = int(result[0].get("count", 0)) if result else 0
+                if updated == 0:
+                    self._insert_record(record, embedding_str, meta_str)
+                written += 1
+
+            elif policy == DuplicatePolicy.SKIP:
+                existing = self._command(
+                    f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}"
+                )
+                if existing:
+                    continue
+                self._insert_record(record, embedding_str, meta_str)
+                written += 1
+
+            else:
+                # DuplicatePolicy.NONE — raise on duplicate
+                existing = self._command(
+                    f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}"
+                )
+                if existing:
+                    msg = f"Document with id '{record['id']}' already exists."
+                    raise DuplicateDocumentError(msg)
+                self._insert_record(record, embedding_str, meta_str)
+                written += 1
+
+        return written
+
+    def _insert_record(self, record: dict[str, Any], embedding_str: str, meta_str: str) -> None:
+        sql = (
+            f"INSERT INTO `{self._type_name}` SET "
+            f"id = {_sql_str(record['id'])}, "
+            f"content = {_sql_str(record['content'])}, "
+            f"embedding = {embedding_str}, "
+            f"meta = {meta_str}"
+        )
+        self._command(sql)
+
+    def delete_documents(self, document_ids: list[str]) -> None:
+        """
+        Delete documents by their IDs.
+
+        :param document_ids: List of document IDs to delete.
+        """
+        self._ensure_initialized()
+        if not document_ids:
+            return
+        ids_str = ", ".join(_sql_str(did) for did in document_ids)
+        self._command(f"DELETE FROM `{self._type_name}` WHERE id IN [{ids_str}]")
+
+    # ------------------------------------------------------------------
+    # Retrieval (called by Retriever components)
+    # ------------------------------------------------------------------
+
+    def _embedding_retrieval(
+        self,
+        query_embedding: list[float],
+        *,
+        filters: dict[str, Any] | None = None,
+        top_k: int = 10,
+    ) -> list[Document]:
+        """
+        Retrieve documents by vector similarity using ArcadeDB's LSM_VECTOR index.
+
+        :param query_embedding: The embedding vector to search with.
+        :param filters: Optional metadata filters (applied as post-filter).
+        :param top_k: Maximum number of documents to return.
+        :returns: Documents ordered by descending similarity score.
+        """
+        self._ensure_initialized()
+        embedding_str = str(query_embedding)
+
+        # vectorNeighbors returns a single row with a "neighbors" list of {record, distance}
+        sql = (
+            f"SELECT vectorNeighbors('{self._type_name}[embedding]', "
+            f"{embedding_str}, {top_k}) AS neighbors"
+        )
+        rows = self._command(sql)
+        if not rows or not rows[0].get("neighbors"):
+            return []
+
+        neighbors = rows[0]["neighbors"]
+        where = _convert_filters(filters)
+
+        documents = []
+        for neighbor in neighbors:
+            record = neighbor.get("record", {})
+            distance = neighbor.get("distance", 0.0)
+            score = 1.0 - distance
+
+            doc = Document(
+                id=record.get("id", ""),
+                content=record.get("content"),
+                meta=record.get("meta") or {},
+                score=score,
+            )
+            documents.append(doc)
+
+        # Post-filter by metadata if specified
+        if where and filters:
+            filtered_ids = {
+                r["id"] for r in self._command(f"SELECT id FROM `{self._type_name}` WHERE {where}")
+            }
+            documents = [d for d in documents if d.id in filtered_ids]
+
+        return documents
+
+
+def _sql_str(value: str | None) -> str:
+    """Escape and quote a string value for ArcadeDB SQL."""
+    if value is None:
+        return "NULL"
+    escaped = value.replace("\\", "\\\\").replace("'", "\\'")
+    return f"'{escaped}'"
+
+
+def _map_literal(meta: dict[str, Any]) -> str:
+    """Build an ArcadeDB MAP literal from a Python dict."""
+    if not meta:
+        return "{}"
+    pairs = []
+    for key, value in meta.items():
+        if isinstance(value, str):
+            pairs.append(f'"{key}": {_sql_str(value)}')
+        elif isinstance(value, bool):
+            pairs.append(f'"{key}": {"true" if value else "false"}')
+        elif isinstance(value, (int, float)):
+            pairs.append(f'"{key}": {value}')
+        elif value is None:
+            pairs.append(f'"{key}": NULL')
+        elif isinstance(value, list):
+            pairs.append(f'"{key}": {value}')
+        else:
+            pairs.append(f'"{key}": {_sql_str(str(value))}')
+    return "{" + ", ".join(pairs) + "}"
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
new file mode 100644
index 0000000000..52a7285989
--- /dev/null
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Convert Haystack filter dictionaries to ArcadeDB SQL WHERE clauses."""
+
+from typing import Any
+
+
+def _convert_filters(filters: dict[str, Any] | None) -> str:
+    """
+    Convert a Haystack filter dictionary to an ArcadeDB SQL WHERE clause.
+
+    Supports comparison operators (==, !=, >, >=, <, <=, in, not in)
+    and logical operators (AND, OR, NOT).
+    """
+    if not filters:
+        return ""
+    return _parse_condition(filters)
+
+
+def _parse_condition(condition: dict[str, Any]) -> str:
+    operator = condition.get("operator")
+    if not operator:
+        msg = f"Missing 'operator' in filter condition: {condition}"
+        raise ValueError(msg)
+
+    operator_upper = operator.upper()
+
+    if operator_upper in ("AND", "OR"):
+        conditions = condition.get("conditions", [])
+        if not conditions:
+            return ""
+        parts = [_parse_condition(c) for c in conditions]
+        parts = [p for p in parts if p]
+        if not parts:
+            return ""
+        if len(parts) == 1:
+            return parts[0]
+        joiner = f" {operator_upper} "
+        return f"({joiner.join(parts)})"
+
+    if operator_upper == "NOT":
+        conditions = condition.get("conditions", [])
+        if not conditions:
+            return ""
+        inner = _parse_condition(conditions[0])
+        return f"NOT ({inner})" if inner else ""
+
+    field = condition.get("field")
+    value = condition.get("value")
+
+    if not field:
+        msg = f"Missing 'field' in filter condition: {condition}"
+        raise ValueError(msg)
+
+    return _comparison_to_sql(field, operator, value)
+
+
+def _comparison_to_sql(field: str, operator: str, value: Any) -> str:
+    if operator == "==":
+        if value is None:
+            return f"{field} IS NULL"
+        return f"{field} = {_sql_value(value)}"
+
+    if operator == "!=":
+        if value is None:
+            return f"{field} IS NOT NULL"
+        return f"{field} <> {_sql_value(value)}"
+
+    if operator == ">":
+        return f"{field} > {_sql_value(value)}"
+
+    if operator == ">=":
+        return f"{field} >= {_sql_value(value)}"
+
+    if operator == "<":
+        return f"{field} < {_sql_value(value)}"
+
+    if operator == "<=":
+        return f"{field} <= {_sql_value(value)}"
+
+    if operator == "in":
+        values = ", ".join(_sql_value(v) for v in value)
+        return f"{field} IN [{values}]"
+
+    if operator == "not in":
+        values = ", ".join(_sql_value(v) for v in value)
+        return f"{field} NOT IN [{values}]"
+
+    msg = f"Unsupported filter operator: {operator}"
+    raise ValueError(msg)
+
+
+def _sql_value(value: Any) -> str:
+    """Format a Python value as an ArcadeDB SQL literal."""
+    if isinstance(value, str):
+        escaped = value.replace("'", "\\'")
+        return f"'{escaped}'"
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return str(value)
+    if value is None:
+        return "NULL"
+    return f"'{value}'"
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/py.typed b/integrations/arcadedb/src/haystack_integrations/document_stores/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/arcadedb/tests/__init__.py b/integrations/arcadedb/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/arcadedb/tests/test_document_store.py b/integrations/arcadedb/tests/test_document_store.py
new file mode 100644
index 0000000000..85e737dd4f
--- /dev/null
+++ b/integrations/arcadedb/tests/test_document_store.py
@@ -0,0 +1,160 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import dataclasses
+import os
+
+import pytest
+from haystack import Document
+from haystack.document_stores.errors import DuplicateDocumentError
+from haystack.document_stores.types import DuplicatePolicy
+
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+ARCADEDB_URL = os.getenv("ARCADEDB_URL", "http://localhost:2480")
+
+
+@pytest.fixture()
+def document_store():
+    store = ArcadeDBDocumentStore(
+        url=ARCADEDB_URL,
+        database="haystack_test",
+        embedding_dimension=4,
+        recreate_type=True,
+    )
+    return store
+
+
+def _sample_docs(n: int = 3, dim: int = 4) -> list[Document]:
+    docs = []
+    for i in range(n):
+        docs.append(
+            Document(
+                content=f"Document number {i}",
+                embedding=[float(i)] * dim,
+                meta={"category": "test", "priority": i},
+            )
+        )
+    return docs
+
+
+# ---------------------------------------------------------------------------
+# Unit tests (no ArcadeDB required)
+# ---------------------------------------------------------------------------
+
+
+class TestSerialization:
+    def test_to_dict_from_dict(self):
+        store = ArcadeDBDocumentStore(
+            url="http://localhost:2480",
+            database="test_db",
+            embedding_dimension=4,
+        )
+        data = store.to_dict()
+        restored = ArcadeDBDocumentStore.from_dict(data)
+        assert restored._database == store._database
+        assert restored._embedding_dimension == store._embedding_dimension
+        assert restored._url == store._url
+
+
+# ---------------------------------------------------------------------------
+# Integration tests (require a running ArcadeDB instance)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.integration
+class TestArcadeDBDocumentStoreIntegration:
+    def test_count_empty(self, document_store):
+        assert document_store.count_documents() == 0
+
+    def test_count_after_write(self, document_store):
+        docs = _sample_docs(5)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+        assert document_store.count_documents() == 5
+
+    def test_write_and_read(self, document_store):
+        docs = _sample_docs(2)
+        written = document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+        assert written == 2
+
+        all_docs = document_store.filter_documents()
+        assert len(all_docs) == 2
+
+    def test_write_overwrite(self, document_store):
+        docs = _sample_docs(1)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        updated = dataclasses.replace(docs[0], content="Updated content")
+        document_store.write_documents([updated], policy=DuplicatePolicy.OVERWRITE)
+
+        all_docs = document_store.filter_documents()
+        assert len(all_docs) == 1
+        assert all_docs[0].content == "Updated content"
+
+    def test_write_skip(self, document_store):
+        docs = _sample_docs(1)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        written = document_store.write_documents(docs, policy=DuplicatePolicy.SKIP)
+        assert written == 0
+        assert document_store.count_documents() == 1
+
+    def test_write_duplicate_raises(self, document_store):
+        docs = _sample_docs(1)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        with pytest.raises(DuplicateDocumentError):
+            document_store.write_documents(docs, policy=DuplicatePolicy.NONE)
+
+    def test_delete(self, document_store):
+        docs = _sample_docs(3)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        ids_to_delete = [docs[0].id, docs[1].id]
+        document_store.delete_documents(ids_to_delete)
+
+        assert document_store.count_documents() == 1
+
+    def test_filter_equality(self, document_store):
+        docs = _sample_docs(3)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        result = document_store.filter_documents(
+            filters={"field": "meta.category", "operator": "==", "value": "test"}
+        )
+        assert len(result) == 3
+
+    def test_filter_comparison(self, document_store):
+        docs = _sample_docs(5)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        result = document_store.filter_documents(
+            filters={"field": "meta.priority", "operator": ">", "value": 2}
+        )
+        assert len(result) == 2  # priority 3 and 4
+
+    def test_filter_and(self, document_store):
+        docs = _sample_docs(5)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        result = document_store.filter_documents(
+            filters={
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.category", "operator": "==", "value": "test"},
+                    {"field": "meta.priority", "operator": ">=", "value": 3},
+                ],
+            }
+        )
+        assert len(result) == 2
+
+    def test_embedding_retrieval(self, document_store):
+        docs = _sample_docs(5, dim=4)
+        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
+
+        results = document_store._embedding_retrieval(
+            query_embedding=[4.0, 4.0, 4.0, 4.0], top_k=3
+        )
+        assert len(results) <= 3
+        assert results[0].score is not None
diff --git a/integrations/arcadedb/tests/test_filters.py b/integrations/arcadedb/tests/test_filters.py
new file mode 100644
index 0000000000..e68e1b135b
--- /dev/null
+++ b/integrations/arcadedb/tests/test_filters.py
@@ -0,0 +1,95 @@
+# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Unit tests for filter conversion (no ArcadeDB instance required)."""
+
+import pytest
+
+from haystack_integrations.document_stores.arcadedb.filters import _convert_filters
+
+
+class TestFilterConversion:
+    def test_none_returns_empty(self):
+        assert _convert_filters(None) == ""
+
+    def test_equality(self):
+        result = _convert_filters({"field": "meta.name", "operator": "==", "value": "alice"})
+        assert result == "meta.name = 'alice'"
+
+    def test_equality_null(self):
+        result = _convert_filters({"field": "meta.name", "operator": "==", "value": None})
+        assert result == "meta.name IS NULL"
+
+    def test_not_equal(self):
+        result = _convert_filters({"field": "meta.name", "operator": "!=", "value": "bob"})
+        assert result == "meta.name <> 'bob'"
+
+    def test_not_equal_null(self):
+        result = _convert_filters({"field": "meta.name", "operator": "!=", "value": None})
+        assert result == "meta.name IS NOT NULL"
+
+    def test_greater_than(self):
+        result = _convert_filters({"field": "meta.score", "operator": ">", "value": 5})
+        assert result == "meta.score > 5"
+
+    def test_in_operator(self):
+        result = _convert_filters({"field": "meta.tag", "operator": "in", "value": ["a", "b"]})
+        assert result == "meta.tag IN ['a', 'b']"
+
+    def test_not_in_operator(self):
+        result = _convert_filters({"field": "meta.tag", "operator": "not in", "value": ["x"]})
+        assert result == "meta.tag NOT IN ['x']"
+
+    def test_and(self):
+        result = _convert_filters({
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.a", "operator": "==", "value": 1},
+                {"field": "meta.b", "operator": ">", "value": 2},
+            ],
+        })
+        assert result == "(meta.a = 1 AND meta.b > 2)"
+
+    def test_or(self):
+        result = _convert_filters({
+            "operator": "OR",
+            "conditions": [
+                {"field": "meta.x", "operator": "==", "value": "yes"},
+                {"field": "meta.y", "operator": "==", "value": "no"},
+            ],
+        })
+        assert result == "(meta.x = 'yes' OR meta.y = 'no')"
+
+    def test_not(self):
+        result = _convert_filters({
+            "operator": "NOT",
+            "conditions": [
+                {"field": "meta.deleted", "operator": "==", "value": True},
+            ],
+        })
+        assert result == "NOT (meta.deleted = true)"
+
+    def test_nested(self):
+        result = _convert_filters({
+            "operator": "AND",
+            "conditions": [
+                {"field": "meta.a", "operator": "==", "value": 1},
+                {
+                    "operator": "OR",
+                    "conditions": [
+                        {"field": "meta.b", "operator": "==", "value": 2},
+                        {"field": "meta.c", "operator": "==", "value": 3},
+                    ],
+                },
+            ],
+        })
+        assert result == "(meta.a = 1 AND (meta.b = 2 OR meta.c = 3))"
+
+    def test_missing_operator_raises(self):
+        with pytest.raises(ValueError):
+            _convert_filters({"field": "x", "value": 1})
+
+    def test_missing_field_raises(self):
+        with pytest.raises(ValueError):
+            _convert_filters({"operator": "==", "value": 1})

From 951a1b52352ab99943fe74a342a2846edd188e88 Mon Sep 17 00:00:00 2001
From: lvca <lvca@users.noreply.github.com>
Date: Sat, 28 Feb 2026 18:33:43 -0500
Subject: [PATCH 02/16] fix: resolve ruff lint errors and set requests minimum
 version

- Remove unused noqa: B008 directives (B008 already in ignore list)
- Use HTTPStatus.BAD_REQUEST instead of magic value 400 (PLR2004)
- Add S608 to ruff ignore (SQL string construction is intentional for
  ArcadeDB HTTP/JSON API with proper value escaping)
- Set requests>=2.28.0 minimum to ensure Python 3.13 compatibility
  (older versions use removed cgi module)
---
 integrations/arcadedb/pyproject.toml                     | 4 +++-
 .../document_stores/arcadedb/document_store.py           | 9 +++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/integrations/arcadedb/pyproject.toml b/integrations/arcadedb/pyproject.toml
index c546f3dd87..8538deba04 100644
--- a/integrations/arcadedb/pyproject.toml
+++ b/integrations/arcadedb/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
 ]
 dependencies = [
   "haystack-ai>=2.9.0",
-  "requests",
+  "requests>=2.28.0",
 ]
 
 [project.urls]
@@ -91,6 +91,8 @@ ignore = [
   "B027", "FBT003", "S105", "S106", "S107",
   "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
   "B008", "S101",
+  # SQL string construction is intentional — ArcadeDB uses HTTP/JSON API with value escaping
+  "S608",
 ]
 
 [tool.ruff.lint.isort]
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 1b2a1e3596..584339c26e 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -5,6 +5,7 @@
 """ArcadeDB DocumentStore for Haystack 2.x — document storage + vector search via HTTP/JSON API."""
 
 import logging
+from http import HTTPStatus
 from typing import Any, ClassVar
 
 import requests
@@ -54,8 +55,8 @@ def __init__(
         *,
         url: str = "http://localhost:2480",
         database: str = "haystack",
-        username: Secret = Secret.from_env_var("ARCADEDB_USERNAME", strict=False),  # noqa: B008
-        password: Secret = Secret.from_env_var("ARCADEDB_PASSWORD", strict=False),  # noqa: B008
+        username: Secret = Secret.from_env_var("ARCADEDB_USERNAME", strict=False),
+        password: Secret = Secret.from_env_var("ARCADEDB_PASSWORD", strict=False),
         type_name: str = "Document",
         embedding_dimension: int = 768,
         similarity_function: str = "cosine",
@@ -135,7 +136,7 @@ def _command(self, sql: str, *, positional_params: list[Any] | None = None) -> l
             payload["params"] = positional_params
 
         resp = self._session.post(url, json=payload, auth=self._auth())
-        if resp.status_code >= 400:
+        if resp.status_code >= HTTPStatus.BAD_REQUEST:
             msg = f"ArcadeDB command failed ({resp.status_code}): {resp.text}"
             raise RuntimeError(msg)
 
@@ -146,7 +147,7 @@ def _server_command(self, command: str) -> dict[str, Any]:
         """Execute a server-level command (e.g. CREATE DATABASE)."""
         url = f"{self._url}/api/v1/server"
         resp = self._session.post(url, json={"command": command}, auth=self._auth())
-        if resp.status_code >= 400:
+        if resp.status_code >= HTTPStatus.BAD_REQUEST:
             msg = f"ArcadeDB server command failed ({resp.status_code}): {resp.text}"
             raise RuntimeError(msg)
         return resp.json()

From e031eb4b9d6461b1fe4ca24b6a093965b59e1036 Mon Sep 17 00:00:00 2001
From: lvca <lvca@users.noreply.github.com>
Date: Sat, 28 Feb 2026 18:36:55 -0500
Subject: [PATCH 03/16] style: apply ruff format to all source files

---
 .../arcadedb/examples/embedding_retrieval.py  | 12 +--
 .../arcadedb/document_store.py                | 17 +----
 .../arcadedb/tests/test_document_store.py     | 12 +--
 integrations/arcadedb/tests/test_filters.py   | 74 ++++++++++---------
 4 files changed, 55 insertions(+), 60 deletions(-)

diff --git a/integrations/arcadedb/examples/embedding_retrieval.py b/integrations/arcadedb/examples/embedding_retrieval.py
index ffdbad3acd..26033a7609 100644
--- a/integrations/arcadedb/examples/embedding_retrieval.py
+++ b/integrations/arcadedb/examples/embedding_retrieval.py
@@ -75,12 +75,14 @@
     print(f"  score={doc.score:.4f}  category={doc.meta.get('category')}  content={doc.content[:80]}...")
 
 # --- 5. Filter retrieval (only 'database' category) ---
-result_filtered = pipeline.run({
-    "retriever": {
-        "query_embedding": query_embedding,
-        "filters": {"field": "meta.category", "operator": "==", "value": "database"},
+result_filtered = pipeline.run(
+    {
+        "retriever": {
+            "query_embedding": query_embedding,
+            "filters": {"field": "meta.category", "operator": "==", "value": "database"},
+        }
     }
-})
+)
 
 print("\n--- Filtered (category=database) ---")
 for doc in result_filtered["retriever"]["documents"]:
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 584339c26e..7cb71acff3 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -271,9 +271,7 @@ def write_documents(
                 written += 1
 
             elif policy == DuplicatePolicy.SKIP:
-                existing = self._command(
-                    f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}"
-                )
+                existing = self._command(f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}")
                 if existing:
                     continue
                 self._insert_record(record, embedding_str, meta_str)
@@ -281,9 +279,7 @@ def write_documents(
 
             else:
                 # DuplicatePolicy.NONE — raise on duplicate
-                existing = self._command(
-                    f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}"
-                )
+                existing = self._command(f"SELECT id FROM `{self._type_name}` WHERE id = {_sql_str(record['id'])}")
                 if existing:
                     msg = f"Document with id '{record['id']}' already exists."
                     raise DuplicateDocumentError(msg)
@@ -337,10 +333,7 @@ def _embedding_retrieval(
         embedding_str = str(query_embedding)
 
         # vectorNeighbors returns a single row with a "neighbors" list of {record, distance}
-        sql = (
-            f"SELECT vectorNeighbors('{self._type_name}[embedding]', "
-            f"{embedding_str}, {top_k}) AS neighbors"
-        )
+        sql = f"SELECT vectorNeighbors('{self._type_name}[embedding]', {embedding_str}, {top_k}) AS neighbors"
         rows = self._command(sql)
         if not rows or not rows[0].get("neighbors"):
             return []
@@ -364,9 +357,7 @@ def _embedding_retrieval(
 
         # Post-filter by metadata if specified
         if where and filters:
-            filtered_ids = {
-                r["id"] for r in self._command(f"SELECT id FROM `{self._type_name}` WHERE {where}")
-            }
+            filtered_ids = {r["id"] for r in self._command(f"SELECT id FROM `{self._type_name}` WHERE {where}")}
             documents = [d for d in documents if d.id in filtered_ids]
 
         return documents
diff --git a/integrations/arcadedb/tests/test_document_store.py b/integrations/arcadedb/tests/test_document_store.py
index 85e737dd4f..a8bdbde32d 100644
--- a/integrations/arcadedb/tests/test_document_store.py
+++ b/integrations/arcadedb/tests/test_document_store.py
@@ -120,18 +120,14 @@ def test_filter_equality(self, document_store):
         docs = _sample_docs(3)
         document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
 
-        result = document_store.filter_documents(
-            filters={"field": "meta.category", "operator": "==", "value": "test"}
-        )
+        result = document_store.filter_documents(filters={"field": "meta.category", "operator": "==", "value": "test"})
         assert len(result) == 3
 
     def test_filter_comparison(self, document_store):
         docs = _sample_docs(5)
         document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
 
-        result = document_store.filter_documents(
-            filters={"field": "meta.priority", "operator": ">", "value": 2}
-        )
+        result = document_store.filter_documents(filters={"field": "meta.priority", "operator": ">", "value": 2})
         assert len(result) == 2  # priority 3 and 4
 
     def test_filter_and(self, document_store):
@@ -153,8 +149,6 @@ def test_embedding_retrieval(self, document_store):
         docs = _sample_docs(5, dim=4)
         document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
 
-        results = document_store._embedding_retrieval(
-            query_embedding=[4.0, 4.0, 4.0, 4.0], top_k=3
-        )
+        results = document_store._embedding_retrieval(query_embedding=[4.0, 4.0, 4.0, 4.0], top_k=3)
         assert len(results) <= 3
         assert results[0].score is not None
diff --git a/integrations/arcadedb/tests/test_filters.py b/integrations/arcadedb/tests/test_filters.py
index e68e1b135b..7beb75006f 100644
--- a/integrations/arcadedb/tests/test_filters.py
+++ b/integrations/arcadedb/tests/test_filters.py
@@ -42,48 +42,56 @@ def test_not_in_operator(self):
         assert result == "meta.tag NOT IN ['x']"
 
     def test_and(self):
-        result = _convert_filters({
-            "operator": "AND",
-            "conditions": [
-                {"field": "meta.a", "operator": "==", "value": 1},
-                {"field": "meta.b", "operator": ">", "value": 2},
-            ],
-        })
+        result = _convert_filters(
+            {
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.a", "operator": "==", "value": 1},
+                    {"field": "meta.b", "operator": ">", "value": 2},
+                ],
+            }
+        )
         assert result == "(meta.a = 1 AND meta.b > 2)"
 
     def test_or(self):
-        result = _convert_filters({
-            "operator": "OR",
-            "conditions": [
-                {"field": "meta.x", "operator": "==", "value": "yes"},
-                {"field": "meta.y", "operator": "==", "value": "no"},
-            ],
-        })
+        result = _convert_filters(
+            {
+                "operator": "OR",
+                "conditions": [
+                    {"field": "meta.x", "operator": "==", "value": "yes"},
+                    {"field": "meta.y", "operator": "==", "value": "no"},
+                ],
+            }
+        )
         assert result == "(meta.x = 'yes' OR meta.y = 'no')"
 
     def test_not(self):
-        result = _convert_filters({
-            "operator": "NOT",
-            "conditions": [
-                {"field": "meta.deleted", "operator": "==", "value": True},
-            ],
-        })
+        result = _convert_filters(
+            {
+                "operator": "NOT",
+                "conditions": [
+                    {"field": "meta.deleted", "operator": "==", "value": True},
+                ],
+            }
+        )
         assert result == "NOT (meta.deleted = true)"
 
     def test_nested(self):
-        result = _convert_filters({
-            "operator": "AND",
-            "conditions": [
-                {"field": "meta.a", "operator": "==", "value": 1},
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "meta.b", "operator": "==", "value": 2},
-                        {"field": "meta.c", "operator": "==", "value": 3},
-                    ],
-                },
-            ],
-        })
+        result = _convert_filters(
+            {
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.a", "operator": "==", "value": 1},
+                    {
+                        "operator": "OR",
+                        "conditions": [
+                            {"field": "meta.b", "operator": "==", "value": 2},
+                            {"field": "meta.c", "operator": "==", "value": 3},
+                        ],
+                    },
+                ],
+            }
+        )
         assert result == "(meta.a = 1 AND (meta.b = 2 OR meta.c = 3))"
 
     def test_missing_operator_raises(self):

From 2f1f2a6bae49505c37cc6fdbb8b7bd550bc03606 Mon Sep 17 00:00:00 2001
From: lvca <lvca@users.noreply.github.com>
Date: Sat, 28 Feb 2026 18:41:06 -0500
Subject: [PATCH 04/16] fix: add type annotation to resolve mypy assignment
 error

---
 .../components/retrievers/arcadedb/embedding_retriever.py        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
index 1624055eb9..58555ac471 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -66,6 +66,7 @@ def run(
         """
         effective_top_k = top_k if top_k is not None else self._top_k
 
+        effective_filters: dict[str, Any] | None
         if self._filter_policy == FilterPolicy.REPLACE and filters is not None:
             effective_filters = filters
         elif self._filter_policy == FilterPolicy.MERGE and filters is not None and self._filters is not None:

From cd3c8a47310838ac17d1430cbe430bcb2722c9f6 Mon Sep 17 00:00:00 2001
From: Julian Risch <julianrisch@gmx.de>
Date: Mon, 2 Mar 2026 13:14:59 +0100
Subject: [PATCH 05/16] Apply suggestions from code review

---
 .github/labeler.yml                           |  5 ++
 integrations/arcadedb/README.md               | 80 ++-----------------
 integrations/arcadedb/pyproject.toml          |  2 +-
 .../arcadedb/embedding_retriever.py           | 37 ++++++++-
 .../arcadedb/document_store.py                | 26 +++++-
 5 files changed, 67 insertions(+), 83 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index a4add7d750..e0b9185e8c 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -24,6 +24,11 @@ integration:anthropic:
       - any-glob-to-any-file: "integrations/anthropic/**/*"
       - any-glob-to-any-file: ".github/workflows/anthropic.yml"
 
+integration:arcadedb:
+  - changed-files:
+      - any-glob-to-any-file: "integrations/arcadedb/**/*"
+      - any-glob-to-any-file: ".github/workflows/arcadedb.yml"
+
 integration:astra:
   - changed-files:
       - any-glob-to-any-file: "integrations/astra/**/*"
diff --git a/integrations/arcadedb/README.md b/integrations/arcadedb/README.md
index 0ee93f0722..eb0ac897ca 100644
--- a/integrations/arcadedb/README.md
+++ b/integrations/arcadedb/README.md
@@ -4,82 +4,12 @@
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/arcadedb-haystack.svg)](https://pypi.org/project/arcadedb-haystack)
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE.txt)
 
-**[ArcadeDB](https://arcadedb.com)** integration for [Haystack](https://haystack.deepset.ai/) 2.x.
+- [Integration page](https://haystack.deepset.ai/integrations/arcadedb)
+- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/arcadedb/CHANGELOG.md)
 
-ArcadeDB is an open-source multi-model database that combines document storage, HNSW vector search, and SQL metadata filtering in a single engine. This integration provides a `DocumentStore` and `EmbeddingRetriever` that connect to ArcadeDB via its HTTP/JSON API using only the `requests` library -- no special drivers needed.
+---
 
-## Installation
+## Contributing
 
-```bash
-pip install arcadedb-haystack
-```
+Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
 
-## Usage
-
-Start ArcadeDB:
-
-```bash
-docker run -d -p 2480:2480 \
-    -e JAVA_OPTS="-Darcadedb.server.rootPassword=arcadedb" \
-    arcadedata/arcadedb:latest
-
-export ARCADEDB_USERNAME=root
-export ARCADEDB_PASSWORD=arcadedb
-```
-
-### Document Store
-
-```python
-from haystack import Document
-from haystack.document_stores.types import DuplicatePolicy
-from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
-
-store = ArcadeDBDocumentStore(
-    database="myproject",
-    embedding_dimension=768,
-)
-
-docs = [
-    Document(
-        content="ArcadeDB supports graphs, documents, and vectors.",
-        embedding=[0.1] * 768,
-        meta={"source": "docs", "category": "database"},
-    )
-]
-store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-store.filter_documents(
-    filters={"field": "meta.category", "operator": "==", "value": "database"}
-)
-```
-
-### Pipeline with Embedding Retriever
-
-```python
-from haystack import Pipeline
-from haystack_integrations.components.retrievers.arcadedb import ArcadeDBEmbeddingRetriever
-from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
-
-store = ArcadeDBDocumentStore(database="myproject", embedding_dimension=768)
-pipeline = Pipeline()
-pipeline.add_component("retriever", ArcadeDBEmbeddingRetriever(document_store=store, top_k=10))
-
-result = pipeline.run({"retriever": {"query_embedding": [0.1] * 768}})
-```
-
-## Configuration
-
-| Parameter | Default | Description |
-|---|---|---|
-| `url` | `http://localhost:2480` | ArcadeDB HTTP endpoint |
-| `database` | `haystack` | Database name |
-| `username` | env `ARCADEDB_USERNAME` | HTTP Basic Auth username |
-| `password` | env `ARCADEDB_PASSWORD` | HTTP Basic Auth password |
-| `type_name` | `Document` | Vertex type name |
-| `embedding_dimension` | `768` | Vector dimension for HNSW index |
-| `similarity_function` | `cosine` | `cosine`, `euclidean`, or `dot` |
-| `recreate_type` | `False` | Drop and recreate type on init |
-| `create_database` | `True` | Create database if it doesn't exist |
-
-## License
-
-`arcadedb-haystack` is distributed under the terms of the [Apache-2.0](LICENSE.txt) license.
diff --git a/integrations/arcadedb/pyproject.toml b/integrations/arcadedb/pyproject.toml
index 8538deba04..d79b975003 100644
--- a/integrations/arcadedb/pyproject.toml
+++ b/integrations/arcadedb/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 license = "Apache-2.0"
 keywords = ["arcadedb", "haystack", "vector-search", "document-store", "rag"]
-authors = [{ name = "ArcadeData Ltd", email = "info@arcadedb.com" }]
+authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }, { name = "ArcadeData Ltd", email = "info@arcadedb.com" }]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 4 - Beta",
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
index 58555ac471..4181205a18 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -20,11 +20,29 @@ class ArcadeDBEmbeddingRetriever:
     Usage example:
 
     ```python
+    from haystack import Document
+    from haystack.components.embedders import SentenceTransformersTextEmbedder
     from haystack_integrations.components.retrievers.arcadedb import ArcadeDBEmbeddingRetriever
     from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
 
     store = ArcadeDBDocumentStore(database="mydb")
     retriever = ArcadeDBEmbeddingRetriever(document_store=store, top_k=5)
+    
+    # Add documents to DocumentStore
+    documents = [
+        Document(text="My name is Carla and I live in Berlin"),
+        Document(text="My name is Paul and I live in New York"),
+        Document(text="My name is Silvano and I live in Matera"),
+        Document(text="My name is Usagi Tsukino and I live in Tokyo"),
+    ]
+    document_store.write_documents(documents)
+    
+    embedder = SentenceTransformersTextEmbedder()
+    query_embeddings = embedder.run("Who lives in Berlin?")["embedding"]
+
+    result = retriever.run(query=query_embeddings)
+    for doc in result["documents"]:
+        print(doc.content)
     ```
     """
 
@@ -62,7 +80,8 @@ def run(
         :param query_embedding: The embedding vector to search with.
         :param filters: Optional filters to narrow results.
         :param top_k: Maximum number of documents to return.
-        :returns: A dict with key ``"documents"`` containing the retrieved documents.
+        :returns: A dictionary with the following keys:
+            - `documents`: List of `Document`s most similar to the given `query_embedding`
         """
         effective_top_k = top_k if top_k is not None else self._top_k
 
@@ -85,7 +104,12 @@ def run(
         return {"documents": documents}
 
     def to_dict(self) -> dict[str, Any]:
-        """Serialize this retriever to a dictionary."""
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
         return default_to_dict(
             self,
             document_store=self._document_store.to_dict(),
@@ -96,7 +120,14 @@ def to_dict(self) -> dict[str, Any]:
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> "ArcadeDBEmbeddingRetriever":
-        """Deserialize an ArcadeDBEmbeddingRetriever from a dictionary."""
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
         init_params = data.get("init_parameters", {})
         if "document_store" in init_params:
             init_params["document_store"] = ArcadeDBDocumentStore.from_dict(init_params["document_store"])
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 7cb71acff3..aade562b19 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -33,13 +33,18 @@ class ArcadeDBDocumentStore:
     Usage example:
 
     ```python
+    from haystack.dataclasses.document import Document
     from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
 
-    store = ArcadeDBDocumentStore(
+    document_store = ArcadeDBDocumentStore(
         url="http://localhost:2480",
         database="haystack",
         embedding_dimension=768,
     )
+    document_store.write_documents([
+        Document(content="This is first", embedding=[0.0]*5),
+        Document(content="This is second", embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
+    ])
     ```
     """
 
@@ -94,7 +99,11 @@ def __init__(
     # ------------------------------------------------------------------
 
     def to_dict(self) -> dict[str, Any]:
-        """Serialize this store to a dictionary."""
+        """
+        Serializes the DocumentStore to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
         return default_to_dict(
             self,
             url=self._url,
@@ -110,7 +119,13 @@ def to_dict(self) -> dict[str, Any]:
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> "ArcadeDBDocumentStore":
-        """Deserialize an ArcadeDBDocumentStore from a dictionary."""
+        """
+        Deserializes the DocumentStore from a dictionary.
+
+        :param data:
+            The dictionary to deserialize from.
+        :returns:
+            The deserialized DocumentStore.
         init_params = data.get("init_parameters", {})
         for key in ("username", "password"):
             if init_params.get(key) is not None:
@@ -208,7 +223,10 @@ def _ensure_initialized(self) -> None:
     # ------------------------------------------------------------------
 
     def count_documents(self) -> int:
-        """Return the number of documents stored."""
+        Returns how many documents are present in the document store.
+
+        :returns:
+            Number of documents in the document store.
         self._ensure_initialized()
         rows = self._command(f"SELECT count(*) AS cnt FROM `{self._type_name}`")
         if rows:

From 9922db947c82c9b6793d765f0ebf26e8ec3b2f79 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 13:21:04 +0100
Subject: [PATCH 06/16] format and fix docstrings

---
 .../components/retrievers/arcadedb/embedding_retriever.py     | 4 ++--
 .../document_stores/arcadedb/document_store.py                | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
index 4181205a18..8fc7bee800 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -27,7 +27,7 @@ class ArcadeDBEmbeddingRetriever:
 
     store = ArcadeDBDocumentStore(database="mydb")
     retriever = ArcadeDBEmbeddingRetriever(document_store=store, top_k=5)
-    
+
     # Add documents to DocumentStore
     documents = [
         Document(text="My name is Carla and I live in Berlin"),
@@ -36,7 +36,7 @@ class ArcadeDBEmbeddingRetriever:
         Document(text="My name is Usagi Tsukino and I live in Tokyo"),
     ]
     document_store.write_documents(documents)
-    
+
     embedder = SentenceTransformersTextEmbedder()
     query_embeddings = embedder.run("Who lives in Berlin?")["embedding"]
 
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index aade562b19..27a2d831e4 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -104,6 +104,7 @@ def to_dict(self) -> dict[str, Any]:
 
         :returns:
             Dictionary with serialized data.
+        """
         return default_to_dict(
             self,
             url=self._url,
@@ -126,6 +127,7 @@ def from_dict(cls, data: dict[str, Any]) -> "ArcadeDBDocumentStore":
             The dictionary to deserialize from.
         :returns:
             The deserialized DocumentStore.
+        """
         init_params = data.get("init_parameters", {})
         for key in ("username", "password"):
             if init_params.get(key) is not None:
@@ -223,10 +225,12 @@ def _ensure_initialized(self) -> None:
     # ------------------------------------------------------------------
 
     def count_documents(self) -> int:
+        """
         Returns how many documents are present in the document store.
 
         :returns:
             Number of documents in the document store.
+        """
         self._ensure_initialized()
         rows = self._command(f"SELECT count(*) AS cnt FROM `{self._type_name}`")
         if rows:

From 6827d0f65c239c686bfdcf04e962fe4b5db98615 Mon Sep 17 00:00:00 2001
From: Julian Risch <julianrisch@gmx.de>
Date: Mon, 2 Mar 2026 13:24:55 +0100
Subject: [PATCH 07/16] Update
 integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py

---
 .../components/retrievers/arcadedb/embedding_retriever.py       | 2 --
 1 file changed, 2 deletions(-)

diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
index 8fc7bee800..ab829a5d21 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -2,8 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-"""ArcadeDB Embedding Retriever for Haystack 2.x pipelines."""
-
 from typing import Any
 
 from haystack import Document, component, default_from_dict, default_to_dict

From fc8a7a80ad91de91501c93d76dfb93dff6378b1c Mon Sep 17 00:00:00 2001
From: Julian Risch <julianrisch@gmx.de>
Date: Mon, 2 Mar 2026 13:25:08 +0100
Subject: [PATCH 08/16] Update .github/labeler.yml

---
 .github/labeler.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index e0b9185e8c..20cb6edf9f 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -4,11 +4,6 @@ integration:aimlapi:
       - any-glob-to-any-file: "integrations/aimlapi/**/*"
       - any-glob-to-any-file: ".github/workflows/aimlapi.yml"
 
-integration:arcadedb:
-  - changed-files:
-      - any-glob-to-any-file: "integrations/arcadedb/**/*"
-      - any-glob-to-any-file: ".github/workflows/arcadedb.yml"
-
 integration:amazon-bedrock:
   - changed-files:
       - any-glob-to-any-file: "integrations/amazon_bedrock/**/*"

From 031932f699b0d09331cb9295d67e7775ed9af6e6 Mon Sep 17 00:00:00 2001
From: Julian Risch <julianrisch@gmx.de>
Date: Mon, 2 Mar 2026 13:25:16 +0100
Subject: [PATCH 09/16] Update
 integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py

---
 .../document_stores/arcadedb/document_store.py                | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 27a2d831e4..41f6849c13 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -94,10 +94,6 @@ def __init__(
         self._session = requests.Session()
         self._initialized = False
 
-    # ------------------------------------------------------------------
-    # Serialization (Haystack pipeline export/import)
-    # ------------------------------------------------------------------
-
     def to_dict(self) -> dict[str, Any]:
         """
         Serializes the DocumentStore to a dictionary.

From 5b29fe107f00c96baf782db17f9529e7ec877988 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 13:38:14 +0100
Subject: [PATCH 10/16] update license for consistency

---
 integrations/arcadedb/LICENSE.txt                               | 2 +-
 integrations/arcadedb/examples/embedding_retrieval.py           | 2 +-
 .../components/retrievers/arcadedb/__init__.py                  | 2 +-
 .../components/retrievers/arcadedb/embedding_retriever.py       | 2 +-
 .../haystack_integrations/document_stores/arcadedb/__init__.py  | 2 +-
 .../document_stores/arcadedb/converters.py                      | 2 +-
 .../document_stores/arcadedb/document_store.py                  | 2 +-
 .../haystack_integrations/document_stores/arcadedb/filters.py   | 2 +-
 integrations/arcadedb/tests/test_document_store.py              | 2 +-
 integrations/arcadedb/tests/test_filters.py                     | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/integrations/arcadedb/LICENSE.txt b/integrations/arcadedb/LICENSE.txt
index 0fa7906e3a..1c8582b372 100644
--- a/integrations/arcadedb/LICENSE.txt
+++ b/integrations/arcadedb/LICENSE.txt
@@ -175,7 +175,7 @@
 
    END OF TERMS AND CONDITIONS
 
-   Copyright 2025 ArcadeData Ltd
+   Copyright 2023-present deepset GmbH
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/integrations/arcadedb/examples/embedding_retrieval.py b/integrations/arcadedb/examples/embedding_retrieval.py
index 26033a7609..cf539f65b9 100644
--- a/integrations/arcadedb/examples/embedding_retrieval.py
+++ b/integrations/arcadedb/examples/embedding_retrieval.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
index eb4a7bfbe5..da774b7d53 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
index ab829a5d21..ba36a58e4c 100644
--- a/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
+++ b/integrations/arcadedb/src/haystack_integrations/components/retrievers/arcadedb/embedding_retriever.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
index 3676df644a..d72ad983c9 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
index ccafefae70..61be17a587 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/converters.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 41f6849c13..c2a3b4e28c 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
index 52a7285989..850e444541 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/tests/test_document_store.py b/integrations/arcadedb/tests/test_document_store.py
index a8bdbde32d..e9e7f2d40c 100644
--- a/integrations/arcadedb/tests/test_document_store.py
+++ b/integrations/arcadedb/tests/test_document_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/arcadedb/tests/test_filters.py b/integrations/arcadedb/tests/test_filters.py
index 7beb75006f..3423007c08 100644
--- a/integrations/arcadedb/tests/test_filters.py
+++ b/integrations/arcadedb/tests/test_filters.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2025-present ArcadeData Ltd <info@arcadedb.com>
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 

From 3498a7a39b6a7f75648ff30ea1572c8ecbb67c81 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 13:55:51 +0100
Subject: [PATCH 11/16] use mixin DocumentStore tests and unify error handling

---
 .../arcadedb/document_store.py                |  16 +-
 .../document_stores/arcadedb/filters.py       |  33 ++--
 .../arcadedb/tests/test_document_store.py     | 141 +++++++-----------
 3 files changed, 94 insertions(+), 96 deletions(-)

diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index c2a3b4e28c..2c99528d8b 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -12,6 +12,7 @@
 from haystack import Document, default_from_dict, default_to_dict
 from haystack.document_stores.errors import DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
+from haystack.errors import FilterError
 from haystack.utils import Secret
 
 from haystack_integrations.document_stores.arcadedb.converters import (
@@ -244,7 +245,10 @@ def filter_documents(
         :returns: List of matching documents.
         """
         self._ensure_initialized()
-        where = _convert_filters(filters)
+        try:
+            where = _convert_filters(filters)
+        except ValueError as e:
+            raise FilterError(str(e)) from e
         sql = f"SELECT * FROM `{self._type_name}`"
         if where:
             sql += f" WHERE {where}"
@@ -264,6 +268,11 @@ def write_documents(
         :returns: Number of documents written.
         """
         self._ensure_initialized()
+        if not isinstance(documents, list):
+            raise ValueError("documents must be a list of Document objects")
+        for doc in documents:
+            if not isinstance(doc, Document):
+                raise ValueError("documents must be a list of Document objects")
         if not documents:
             return 0
 
@@ -271,7 +280,10 @@ def write_documents(
         written = 0
 
         for record in records:
-            embedding_str = str(record["embedding"]) if record["embedding"] else "[]"
+            emb = record["embedding"]
+            if emb is None or not isinstance(emb, list) or len(emb) != self._embedding_dimension:
+                emb = [0.0] * self._embedding_dimension
+            embedding_str = str(emb)
             meta_str = _map_literal(record["meta"]) if record["meta"] else "{}"
 
             if policy == DuplicatePolicy.OVERWRITE:
diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
index 850e444541..d089898a58 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/filters.py
@@ -28,6 +28,9 @@ def _parse_condition(condition: dict[str, Any]) -> str:
     operator_upper = operator.upper()
 
     if operator_upper in ("AND", "OR"):
+        if "conditions" not in condition:
+            msg = f"Missing 'conditions' in filter: {condition}"
+            raise ValueError(msg)
         conditions = condition.get("conditions", [])
         if not conditions:
             return ""
@@ -53,6 +56,9 @@ def _parse_condition(condition: dict[str, Any]) -> str:
     if not field:
         msg = f"Missing 'field' in filter condition: {condition}"
         raise ValueError(msg)
+    if "value" not in condition:
+        msg = f"Missing 'value' in filter condition: {condition}"
+        raise ValueError(msg)
 
     return _comparison_to_sql(field, operator, value)
 
@@ -68,23 +74,28 @@ def _comparison_to_sql(field: str, operator: str, value: Any) -> str:
             return f"{field} IS NOT NULL"
         return f"{field} <> {_sql_value(value)}"
 
-    if operator == ">":
-        return f"{field} > {_sql_value(value)}"
-
-    if operator == ">=":
-        return f"{field} >= {_sql_value(value)}"
-
-    if operator == "<":
-        return f"{field} < {_sql_value(value)}"
-
-    if operator == "<=":
-        return f"{field} <= {_sql_value(value)}"
+    if operator in (">", ">=", "<", "<="):
+        if value is None:
+            return "1 = 0"
+        if isinstance(value, list):
+            msg = "Comparison operators require numeric or datetime values, not list"
+            raise ValueError(msg)
+        if isinstance(value, str) and "T" not in value:
+            msg = "Comparison operators require numeric or datetime (ISO) values, not plain string"
+            raise ValueError(msg)
+        return f"{field} {operator} {_sql_value(value)}"
 
     if operator == "in":
+        if not isinstance(value, list):
+            msg = "Operator 'in' requires value to be a list"
+            raise ValueError(msg)
         values = ", ".join(_sql_value(v) for v in value)
         return f"{field} IN [{values}]"
 
     if operator == "not in":
+        if not isinstance(value, list):
+            msg = "Operator 'not in' requires value to be a list"
+            raise ValueError(msg)
         values = ", ".join(_sql_value(v) for v in value)
         return f"{field} NOT IN [{values}]"
 
diff --git a/integrations/arcadedb/tests/test_document_store.py b/integrations/arcadedb/tests/test_document_store.py
index e9e7f2d40c..11c3ed3820 100644
--- a/integrations/arcadedb/tests/test_document_store.py
+++ b/integrations/arcadedb/tests/test_document_store.py
@@ -9,23 +9,13 @@
 from haystack import Document
 from haystack.document_stores.errors import DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
+from haystack.testing.document_store import DocumentStoreBaseTests
 
 from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
 
 ARCADEDB_URL = os.getenv("ARCADEDB_URL", "http://localhost:2480")
 
 
-@pytest.fixture()
-def document_store():
-    store = ArcadeDBDocumentStore(
-        url=ARCADEDB_URL,
-        database="haystack_test",
-        embedding_dimension=4,
-        recreate_type=True,
-    )
-    return store
-
-
 def _sample_docs(n: int = 3, dim: int = 4) -> list[Document]:
     docs = []
     for i in range(n):
@@ -64,24 +54,49 @@ def test_to_dict_from_dict(self):
 
 
 @pytest.mark.integration
-class TestArcadeDBDocumentStoreIntegration:
-    def test_count_empty(self, document_store):
-        assert document_store.count_documents() == 0
-
-    def test_count_after_write(self, document_store):
-        docs = _sample_docs(5)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-        assert document_store.count_documents() == 5
-
-    def test_write_and_read(self, document_store):
-        docs = _sample_docs(2)
-        written = document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-        assert written == 2
-
-        all_docs = document_store.filter_documents()
-        assert len(all_docs) == 2
+class TestArcadeDBDocumentStore(DocumentStoreBaseTests):
+    """
+    Run Haystack DocumentStore mixin tests against ArcadeDBDocumentStore.
+
+    Base tests cover: count_documents, delete_documents, filter_documents, write_documents.
+    ArcadeDB does not implement delete_all_documents, delete_by_filter, or update_by_filter,
+    so DocumentStoreBaseTests (not Extended) is used.
+    """
+
+    @pytest.fixture
+    def document_store(self, document_store: ArcadeDBDocumentStore) -> ArcadeDBDocumentStore:
+        """Override to provide ArcadeDB document store from conftest."""
+        yield document_store
+
+    def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
+        """
+        Compare document lists for tests. Clear score (filter_documents does not set it;
+        embedding_retrieval does). Compare embeddings approximately for float round-trip.
+        Documents written without embeddings get zero-padded in the store; treat as None for comparison.
+        """
+        assert len(received) == len(expected)
+        received = sorted(received, key=lambda x: x.id)
+        expected = sorted(expected, key=lambda x: x.id)
+        for received_doc, expected_doc in zip(received, expected, strict=True):
+            received_doc.score = None
+            if expected_doc.embedding is None:
+                received_doc.embedding = None
+            elif received_doc.embedding is None:
+                assert expected_doc.embedding is None
+            else:
+                assert received_doc.embedding == pytest.approx(expected_doc.embedding)
+            received_doc.embedding, expected_doc.embedding = None, None
+            assert received_doc == expected_doc
+
+    def test_write_documents(self, document_store: ArcadeDBDocumentStore):
+        """Override mixin: test default write_documents and duplicate fail behaviour."""
+        docs = [Document(id="1")]
+        assert document_store.write_documents(docs) == 1
+        with pytest.raises(DuplicateDocumentError):
+            document_store.write_documents(docs, policy=DuplicatePolicy.FAIL)
 
-    def test_write_overwrite(self, document_store):
+    def test_write_overwrite(self, document_store: ArcadeDBDocumentStore):
+        """ArcadeDB-specific: overwrite updates content."""
         docs = _sample_docs(1)
         document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
 
@@ -92,63 +107,23 @@ def test_write_overwrite(self, document_store):
         assert len(all_docs) == 1
         assert all_docs[0].content == "Updated content"
 
-    def test_write_skip(self, document_store):
-        docs = _sample_docs(1)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        written = document_store.write_documents(docs, policy=DuplicatePolicy.SKIP)
-        assert written == 0
-        assert document_store.count_documents() == 1
-
-    def test_write_duplicate_raises(self, document_store):
-        docs = _sample_docs(1)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        with pytest.raises(DuplicateDocumentError):
-            document_store.write_documents(docs, policy=DuplicatePolicy.NONE)
-
-    def test_delete(self, document_store):
-        docs = _sample_docs(3)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        ids_to_delete = [docs[0].id, docs[1].id]
-        document_store.delete_documents(ids_to_delete)
-
-        assert document_store.count_documents() == 1
-
-    def test_filter_equality(self, document_store):
-        docs = _sample_docs(3)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        result = document_store.filter_documents(filters={"field": "meta.category", "operator": "==", "value": "test"})
-        assert len(result) == 3
-
-    def test_filter_comparison(self, document_store):
-        docs = _sample_docs(5)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        result = document_store.filter_documents(filters={"field": "meta.priority", "operator": ">", "value": 2})
-        assert len(result) == 2  # priority 3 and 4
-
-    def test_filter_and(self, document_store):
-        docs = _sample_docs(5)
+    def test_embedding_retrieval(self, document_store: ArcadeDBDocumentStore):
+        """ArcadeDB-specific: vector search via _embedding_retrieval."""
+        # Use store's embedding_dimension (768 from conftest); create small test docs
+        dim = document_store._embedding_dimension
+        docs = [
+            Document(
+                content=f"Document number {i}",
+                embedding=[float(i)] * dim,
+                meta={"category": "test", "priority": i},
+            )
+            for i in range(5)
+        ]
         document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
 
-        result = document_store.filter_documents(
-            filters={
-                "operator": "AND",
-                "conditions": [
-                    {"field": "meta.category", "operator": "==", "value": "test"},
-                    {"field": "meta.priority", "operator": ">=", "value": 3},
-                ],
-            }
+        results = document_store._embedding_retrieval(
+            query_embedding=[4.0] * dim,
+            top_k=3,
         )
-        assert len(result) == 2
-
-    def test_embedding_retrieval(self, document_store):
-        docs = _sample_docs(5, dim=4)
-        document_store.write_documents(docs, policy=DuplicatePolicy.OVERWRITE)
-
-        results = document_store._embedding_retrieval(query_embedding=[4.0, 4.0, 4.0, 4.0], top_k=3)
         assert len(results) <= 3
         assert results[0].score is not None

From a535440ef3894c930e2691cb6e546a3fe11d27ae Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 13:58:20 +0100
Subject: [PATCH 12/16] reuse variable in raise ValueError calls

---
 .../document_stores/arcadedb/document_store.py               | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
index 2c99528d8b..fb5941a169 100644
--- a/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
+++ b/integrations/arcadedb/src/haystack_integrations/document_stores/arcadedb/document_store.py
@@ -268,11 +268,12 @@ def write_documents(
         :returns: Number of documents written.
         """
         self._ensure_initialized()
+        msg = "documents must be a list of Document objects"
         if not isinstance(documents, list):
-            raise ValueError("documents must be a list of Document objects")
+            raise ValueError(msg)
         for doc in documents:
             if not isinstance(doc, Document):
-                raise ValueError("documents must be a list of Document objects")
+                raise ValueError(msg)
         if not documents:
             return 0
 

From 36d08ddeb172fed8132bee9e37bb482b7442123f Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 14:01:06 +0100
Subject: [PATCH 13/16] add conftest

---
 integrations/arcadedb/tests/conftest.py | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 integrations/arcadedb/tests/conftest.py

diff --git a/integrations/arcadedb/tests/conftest.py b/integrations/arcadedb/tests/conftest.py
new file mode 100644
index 0000000000..93da0a28a0
--- /dev/null
+++ b/integrations/arcadedb/tests/conftest.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2025-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Pytest fixtures for ArcadeDB integration tests."""
+
+import os
+
+import pytest
+
+from haystack_integrations.document_stores.arcadedb import ArcadeDBDocumentStore
+
+ARCADEDB_URL = os.getenv("ARCADEDB_URL", "http://localhost:2480")
+
+
+@pytest.fixture
+def document_store():
+    """
+    ArcadeDB document store instance for integration tests.
+
+    """
+    store = ArcadeDBDocumentStore(
+        url=ARCADEDB_URL,
+        database="haystack_test",
+        embedding_dimension=768,
+        recreate_type=True,
+    )
+    return store

From 4d8b624361d7a54e068d10d275b51293fffa77e6 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 14:08:26 +0100
Subject: [PATCH 14/16] use action secret for ArcadeDB

---
 .github/workflows/arcadedb.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/arcadedb.yml b/.github/workflows/arcadedb.yml
index 28b57526f9..81d37c33d6 100644
--- a/.github/workflows/arcadedb.yml
+++ b/.github/workflows/arcadedb.yml
@@ -19,7 +19,7 @@ env:
   PYTHONUNBUFFERED: "1"
   FORCE_COLOR: "1"
   ARCADEDB_USERNAME: "root"
-  ARCADEDB_PASSWORD: "arcadedb"
+  ARCADEDB_PASSWORD: ${{ secrets.ARCADEDB_PASSWORD }}
 
 defaults:
   run:
@@ -38,7 +38,7 @@ jobs:
       arcadedb:
         image: arcadedata/arcadedb:latest
         env:
-          JAVA_OPTS: "-Darcadedb.server.rootPassword=arcadedb"
+          JAVA_OPTS: "-Darcadedb.server.rootPassword=${{ secrets.ARCADEDB_PASSWORD }}"
         ports:
           - 2480:2480
 

From b379a1444c252a8be0f4e564262ff9d2703a09c4 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 14:19:18 +0100
Subject: [PATCH 15/16] wait for ArcadeDB service to start

---
 .github/workflows/arcadedb.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/arcadedb.yml b/.github/workflows/arcadedb.yml
index 81d37c33d6..288c97cd0f 100644
--- a/.github/workflows/arcadedb.yml
+++ b/.github/workflows/arcadedb.yml
@@ -53,6 +53,20 @@ jobs:
       - name: Install Hatch
         run: pip install hatch "virtualenv<21.0.0"
 
+      - name: Wait for ArcadeDB
+        run: |
+          timeout=60
+          until [ $timeout -le 0 ] || curl -sSf -u "$ARCADEDB_USERNAME:$ARCADEDB_PASSWORD" http://localhost:2480/api/v1/server > /dev/null; do
+            echo "Waiting for ArcadeDB service to start... ($timeout s left)"
+            sleep 5
+            timeout=$((timeout - 5))
+          done
+          if [ $timeout -le 0 ]; then
+            echo "Timed out waiting for ArcadeDB service to start."
+            exit 1
+          fi
+          echo "ArcadeDB is ready."
+
       - name: Lint
         if: matrix.python-version == '3.10' && runner.os == 'Linux'
         run: hatch run fmt-check && hatch run test:types

From dea7a7e287e5d0ebd3d5a6737fac8ad832549dfe Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Mon, 2 Mar 2026 14:27:32 +0100
Subject: [PATCH 16/16] use default ARCADEDB_PASSWORD in forks

---
 .github/workflows/arcadedb.yml                 | 18 +++---------------
 .../arcadedb/tests/test_document_store.py      | 14 ++++----------
 2 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/arcadedb.yml b/.github/workflows/arcadedb.yml
index 288c97cd0f..58c3cb0f31 100644
--- a/.github/workflows/arcadedb.yml
+++ b/.github/workflows/arcadedb.yml
@@ -19,6 +19,7 @@ env:
   PYTHONUNBUFFERED: "1"
   FORCE_COLOR: "1"
   ARCADEDB_USERNAME: "root"
+  # Only set in main repo (secrets not passed to fork workflows); integration tests skip when unset
   ARCADEDB_PASSWORD: ${{ secrets.ARCADEDB_PASSWORD }}
 
 defaults:
@@ -38,7 +39,8 @@ jobs:
       arcadedb:
         image: arcadedata/arcadedb:latest
         env:
-          JAVA_OPTS: "-Darcadedb.server.rootPassword=${{ secrets.ARCADEDB_PASSWORD }}"
+          # Default password so container starts in forks; main repo uses secret
+          JAVA_OPTS: "-Darcadedb.server.rootPassword=${{ secrets.ARCADEDB_PASSWORD || 'arcadedb' }}"
         ports:
           - 2480:2480
 
@@ -53,20 +55,6 @@ jobs:
       - name: Install Hatch
         run: pip install hatch "virtualenv<21.0.0"
 
-      - name: Wait for ArcadeDB
-        run: |
-          timeout=60
-          until [ $timeout -le 0 ] || curl -sSf -u "$ARCADEDB_USERNAME:$ARCADEDB_PASSWORD" http://localhost:2480/api/v1/server > /dev/null; do
-            echo "Waiting for ArcadeDB service to start... ($timeout s left)"
-            sleep 5
-            timeout=$((timeout - 5))
-          done
-          if [ $timeout -le 0 ]; then
-            echo "Timed out waiting for ArcadeDB service to start."
-            exit 1
-          fi
-          echo "ArcadeDB is ready."
-
       - name: Lint
         if: matrix.python-version == '3.10' && runner.os == 'Linux'
         run: hatch run fmt-check && hatch run test:types
diff --git a/integrations/arcadedb/tests/test_document_store.py b/integrations/arcadedb/tests/test_document_store.py
index 11c3ed3820..fb0f0ca4a8 100644
--- a/integrations/arcadedb/tests/test_document_store.py
+++ b/integrations/arcadedb/tests/test_document_store.py
@@ -29,11 +29,6 @@ def _sample_docs(n: int = 3, dim: int = 4) -> list[Document]:
     return docs
 
 
-# ---------------------------------------------------------------------------
-# Unit tests (no ArcadeDB required)
-# ---------------------------------------------------------------------------
-
-
 class TestSerialization:
     def test_to_dict_from_dict(self):
         store = ArcadeDBDocumentStore(
@@ -48,11 +43,10 @@ def test_to_dict_from_dict(self):
         assert restored._url == store._url
 
 
-# ---------------------------------------------------------------------------
-# Integration tests (require a running ArcadeDB instance)
-# ---------------------------------------------------------------------------
-
-
+@pytest.mark.skipif(
+    not os.environ.get("ARCADEDB_PASSWORD"),
+    reason="Set ARCADEDB_PASSWORD (e.g. via repo secret in CI) to run integration tests.",
+)
 @pytest.mark.integration
 class TestArcadeDBDocumentStore(DocumentStoreBaseTests):
     """