Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file added integrations/oracle/README.md
Empty file.
14 changes: 14 additions & 0 deletions integrations/oracle/pydoc/config_docusaurus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
loaders:
- modules:
- haystack_integrations.components.retrievers.oracle.embedding_retriever
- haystack_integrations.document_stores.oracle.document_store
search_path: [../src]
processors:
- type: filter
documented_only: true
skip_empty_modules: true
renderer:
description: Oracle AI Vector Search integration for Haystack
id: integrations-oracle
filename: oracle.md
title: Oracle AI Vector Search
90 changes: 90 additions & 0 deletions integrations/oracle/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "oracle-haystack"
dynamic = ["version"]
description = "Oracle AI Vector Search DocumentStore integration for Haystack"
readme = "README.md"
requires-python = ">=3.10"
license = "Apache-2.0"
keywords = ["haystack", "oracle", "vector search", "document store", "RAG", "OCI"]
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 3 - Alpha",
"Programming Language :: Python",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
]
dependencies = [
"haystack-ai>=2.0.0",
"oracledb>=2.1.0,<3.0.0",
]

[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
"pytest-mock>=3.12.0",
"ruff>=0.4.0",
"mypy>=1.9.0",
]

[project.urls]
"Source Code" = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/oracle"
"Bug Tracker" = "https://github.com/deepset-ai/haystack-core-integrations/issues"

[tool.hatch.version]
source = "vcs"
tag-pattern = 'integrations\/oracle-v(?P<version>.*)'
fallback-version = "0.1.0"

[tool.hatch.version.raw-options]
root = "../.."
git_describe_command = 'git describe --tags --match="integrations/oracle-v[0-9]*"'

[tool.hatch.build.targets.wheel]
packages = ["src/haystack_integrations"]

[tool.hatch.envs.default]
installer = "uv"
dependencies = ["haystack-pydoc-tools", "ruff"]

[tool.hatch.envs.default.scripts]
docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
fmt = "ruff check --fix {args}; ruff format {args}"

[tool.hatch.envs.test]
dependencies = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
"pytest-mock>=3.12.0",
]

[tool.hatch.envs.test.scripts]
unit = "pytest tests/unit/ -v"
integration = "pytest tests/integration/ -v"

[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"
markers = [
"unit: fast tests, no Oracle connection required",
"integration: require a live Oracle 23ai instance",
]

[tool.ruff]
line-length = 120

[tool.ruff.lint]
select = ["E", "F", "I", "B"]

[tool.mypy]
python_version = "3.10"
disallow_untyped_defs = true
ignore_missing_imports = true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from haystack_integrations.components.retrievers.oracle.embedding_retriever import OracleEmbeddingRetriever

__all__ = ["OracleEmbeddingRetriever"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from __future__ import annotations

from typing import Any

from haystack import component, default_from_dict, default_to_dict
from haystack.dataclasses import Document

from haystack_integrations.document_stores.oracle import OracleDocumentStore


def _merge_filters(
base: dict[str, Any] | None,
override: dict[str, Any] | None,
) -> dict[str, Any] | None:
"""AND-merge two Haystack filter dicts. Returns None if both are empty."""
base = base or {}
override = override or {}
if not base and not override:
return None
if not base:
return override
if not override:
return base
return {"operator": "AND", "conditions": [base, override]}


@component
class OracleEmbeddingRetriever:
"""Retrieves documents from an OracleDocumentStore using vector similarity.

Use inside a Haystack pipeline after a text embedder::

pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
pipeline.add_component("retriever", OracleEmbeddingRetriever(
document_store=store, top_k=5
))
pipeline.connect("embedder.embedding", "retriever.query_embedding")
"""

def __init__(
self,
*,
document_store: OracleDocumentStore,
filters: dict[str, Any] | None = None,
top_k: int = 10,
) -> None:
self.document_store = document_store
self.filters = filters or {}
self.top_k = top_k

@component.output_types(documents=list[Document])
def run(
self,
query_embedding: list[float],
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""Retrieve documents by vector similarity.

Args:
query_embedding: Dense float vector from an embedder component.
filters: Runtime filters, AND-merged with constructor filters.
top_k: Override the constructor top_k for this call.

Returns:
``{"documents": [Document, ...]}``
"""
merged = _merge_filters(self.filters, filters)
docs = self.document_store._embedding_retrieval(
query_embedding,
filters=merged,
top_k=top_k if top_k is not None else self.top_k,
)
return {"documents": docs}

@component.output_types(documents=list[Document])
async def run_async(
self,
query_embedding: list[float],
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""Async variant of :meth:`run`."""
merged = _merge_filters(self.filters, filters)
docs = await self.document_store._async_embedding_retrieval(
query_embedding,
filters=merged,
top_k=top_k if top_k is not None else self.top_k,
)
return {"documents": docs}

def to_dict(self) -> dict[str, Any]:
return default_to_dict(
self,
document_store=self.document_store.to_dict(),
filters=self.filters,
top_k=self.top_k,
)

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "OracleEmbeddingRetriever":
params = data.get("init_parameters", {})
if "document_store" in params:
params["document_store"] = OracleDocumentStore.from_dict(params["document_store"])
return default_from_dict(cls, data)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from haystack_integrations.document_stores.oracle.document_store import (
OracleConnectionConfig,
OracleDocumentStore,
)

__all__ = ["OracleConnectionConfig", "OracleDocumentStore"]
Loading
Loading