Skip to content

Commit b43e664

Browse files
committed
code cleanup v2
1 parent d8b17e7 commit b43e664

8 files changed

Lines changed: 54 additions & 36 deletions

File tree

examples/query/indexing_pipeline.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import requests
1010
from couchbase.n1ql import QueryScanConsistency
11+
from couchbase.options import KnownConfigProfiles, QueryOptions
1112
from haystack import Pipeline
1213
from haystack.components.converters import TextFileToDocument
1314
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
@@ -16,13 +17,12 @@
1617
from haystack.utils import Secret
1718

1819
from couchbase_haystack import (
20+
CouchbaseClusterOptions,
1921
CouchbasePasswordAuthenticator,
2022
CouchbaseQueryDocumentStore,
2123
CouchbaseQueryOptions,
2224
QueryVectorSearchType,
23-
CouchbaseClusterOptions,
2425
)
25-
from couchbase.options import KnownConfigProfiles, QueryOptions
2626

2727
logger = logging.getLogger(__name__)
2828

@@ -59,7 +59,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
5959

6060
document_store = CouchbaseQueryDocumentStore(
6161
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
62-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
62+
authenticator=CouchbasePasswordAuthenticator(
63+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
64+
),
6365
cluster_options=CouchbaseClusterOptions(profile=KnownConfigProfiles.WanDevelopment),
6466
bucket=bucket_name,
6567
scope=scope_name,
@@ -102,6 +104,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
102104
"description": "IVF,PQ32x8",
103105
"similarity": "L2",
104106
}
105-
document_store.scope.query(f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}", QueryOptions(timeout=timedelta(seconds=300))).execute()
107+
document_store.scope.query(
108+
f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}",
109+
QueryOptions(timeout=timedelta(seconds=300)),
110+
).execute()
106111

107-
logger.info(f"Index created: {index_name}")
112+
logger.info(f"Index created: {index_name}")

examples/query/rag_pipeline.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
import os
2+
3+
from couchbase.options import KnownConfigProfiles
24
from haystack import GeneratedAnswer, Pipeline
35
from haystack.components.builders.answer_builder import AnswerBuilder
46
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
5-
from haystack.dataclasses import ChatMessage
67
from haystack.components.embedders import SentenceTransformersTextEmbedder
78
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
8-
from haystack.utils.hf import HFGenerationAPIType
9+
from haystack.dataclasses import ChatMessage
910
from haystack.utils import Secret
11+
from haystack.utils.hf import HFGenerationAPIType
12+
1013
from couchbase_haystack import (
1114
CouchbaseClusterOptions,
1215
CouchbasePasswordAuthenticator,
1316
CouchbaseQueryDocumentStore,
1417
CouchbaseQueryEmbeddingRetriever,
1518
QueryVectorSearchType,
1619
)
17-
from couchbase.options import KnownConfigProfiles
1820

1921
# Load HF Token from environment variables.
2022
HF_TOKEN = Secret.from_env_var("HF_API_TOKEN")
@@ -29,7 +31,9 @@
2931

3032
document_store = CouchbaseQueryDocumentStore(
3133
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
32-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
34+
authenticator=CouchbasePasswordAuthenticator(
35+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
36+
),
3337
cluster_options=CouchbaseClusterOptions(
3438
profile=KnownConfigProfiles.WanDevelopment,
3539
),
@@ -45,14 +49,16 @@
4549
# interacting with LLMs using a custom prompt.
4650
prompt_messages = [
4751
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
48-
ChatMessage.from_user("""Given these documents, answer the question.
52+
ChatMessage.from_user(
53+
"""Given these documents, answer the question.
4954
Documents:
5055
{% for doc in documents %}
5156
{{ doc.content }}
5257
{% endfor %}
5358
5459
Question: {{question}}
55-
Answer:""")
60+
Answer:"""
61+
),
5662
]
5763
rag_pipeline = Pipeline()
5864
rag_pipeline.add_component(
@@ -61,10 +67,13 @@
6167
)
6268
rag_pipeline.add_component("retriever", CouchbaseQueryEmbeddingRetriever(document_store=document_store))
6369
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
64-
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
65-
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
66-
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
67-
))
70+
rag_pipeline.add_component(
71+
"llm",
72+
HuggingFaceAPIChatGenerator(
73+
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
74+
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
75+
),
76+
)
6877
rag_pipeline.add_component("answer_builder", AnswerBuilder())
6978

7079
rag_pipeline.connect("query_embedder", "retriever.query_embedding")

examples/search/rag_pipeline.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2+
23
from haystack import GeneratedAnswer, Pipeline
34
from haystack.components.builders.answer_builder import AnswerBuilder
45
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
5-
from haystack.dataclasses import ChatMessage
66
from haystack.components.embedders import SentenceTransformersTextEmbedder
77
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
8-
from haystack.utils.hf import HFGenerationAPIType
8+
from haystack.dataclasses import ChatMessage
99
from haystack.utils import Secret
10+
from haystack.utils.hf import HFGenerationAPIType
1011

1112
from couchbase_haystack import CouchbasePasswordAuthenticator, CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever
1213

@@ -23,7 +24,9 @@
2324

2425
document_store = CouchbaseSearchDocumentStore(
2526
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
26-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
27+
authenticator=CouchbasePasswordAuthenticator(
28+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
29+
),
2730
bucket=os.getenv("BUCKET_NAME"),
2831
scope=os.getenv("SCOPE_NAME"),
2932
collection=os.getenv("COLLECTION_NAME"),
@@ -34,14 +37,16 @@
3437
# interacting with LLMs using a custom prompt.
3538
prompt_messages = [
3639
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
37-
ChatMessage.from_user("""Given these documents, answer the question.
40+
ChatMessage.from_user(
41+
"""Given these documents, answer the question.
3842
Documents:
3943
{% for doc in documents %}
4044
{{ doc.content }}
4145
{% endfor %}
4246
4347
Question: {{question}}
44-
Answer:""")
48+
Answer:"""
49+
),
4550
]
4651
rag_pipeline = Pipeline()
4752
rag_pipeline.add_component(
@@ -50,10 +55,13 @@
5055
)
5156
rag_pipeline.add_component("retriever", CouchbaseSearchEmbeddingRetriever(document_store=document_store))
5257
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
53-
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
54-
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
55-
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
56-
))
58+
rag_pipeline.add_component(
59+
"llm",
60+
HuggingFaceAPIChatGenerator(
61+
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
62+
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
63+
),
64+
)
5765
rag_pipeline.add_component("answer_builder", AnswerBuilder())
5866

5967
rag_pipeline.connect("query_embedder", "retriever.query_embedding")

src/couchbase_haystack/components/retrievers/embedding_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@component
1717
class CouchbaseSearchEmbeddingRetriever:
1818
"""Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.
19-
19+
2020
Uses Search Vector Index (FTS-based) for hybrid searches combining vector, full-text, and geospatial queries.
2121
See CouchbaseSearchDocumentStore for more information.
2222
@@ -139,7 +139,7 @@ def run(
139139
@component
140140
class CouchbaseQueryEmbeddingRetriever:
141141
"""Retrieves documents from the CouchbaseQueryDocumentStore using vector similarity search.
142-
142+
143143
Works with both Hyperscale Vector Index and Composite Vector Index.
144144
Supports ANN (approximate) and KNN (exact) search with various similarity metrics.
145145
See CouchbaseQueryDocumentStore for more details.

src/couchbase_haystack/document_stores/document_store.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -580,18 +580,18 @@ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:
580580

581581
class CouchbaseQueryDocumentStore(CouchbaseDocumentStore):
582582
"""CouchbaseQueryDocumentStore uses Couchbase Global Secondary Index (GSI) for high-performance vector search.
583-
583+
584584
Supports two types of vector indexes:
585-
585+
586586
- **Hyperscale Vector Indexes**: Optimized for pure vector searches, scales to billions of documents.
587587
Best for chatbot context (RAG), reverse image search, and anomaly detection.
588-
588+
589589
- **Composite Vector Indexes**: Combines vector and scalar indexing. Applies scalar filters before vector search.
590590
Best for filtered recommendations, job searches, and supply chain management.
591-
591+
592592
Search types: ANN (fast, approximate) or KNN (exact).
593593
Similarity metrics: COSINE, DOT, L2/EUCLIDEAN, L2_SQUARED/EUCLIDEAN_SQUARED.
594-
594+
595595
See [Couchbase documentation](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html).
596596
"""
597597

@@ -802,7 +802,6 @@ def _embedding_retrieval(
802802
""" # noqa: S608 # query_vector_str is a float array, where_clause is normalized by normalize_sql_filters
803803

804804
try:
805-
806805
query_options = self.query_options.cb_query_options()
807806
# Execute the query
808807
result: QueryResult = self.connection.query(

src/couchbase_haystack/telemetry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ def send_telemetry() -> None:
5757
"""
5858
with suppress(Exception):
5959
t = threading.Thread(target=_send_telemetry, daemon=True)
60-
t.start()
60+
t.start()

tests/search_document_store/test_document_store.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def document_store(self):
149149
cluster.close()
150150

151151
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
152-
153152
for r in received:
154153
r.score = None
155154
r.embedding = None
@@ -283,7 +282,6 @@ class TestSearchDocumentStoreUnit:
283282
@pytest.fixture
284283
def document_store(self):
285284
with patch("couchbase_haystack.document_stores.document_store.Cluster") as mock_cb_cluster:
286-
287285
cluster = mock_cb_cluster.return_value
288286
bucket = cluster.bucket.return_value
289287
scope = bucket.scope.return_value

tests/search_document_store/test_search_filter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ def test_filter_lt_condition_array_of_number(self):
216216

217217
@pytest.mark.unit
218218
class TestFilterLTE:
219-
220219
# def test_filter_gt_condition_str(self):
221220
# _filter = {"field": "meta.years", "operator": "==", "value": "2019"}
222221
# normalized_filter = _normalize_filters(_filter)

0 commit comments

Comments
 (0)