-
Notifications
You must be signed in to change notification settings - Fork 266
Expand file tree
/
Copy pathpipeline.py
More file actions
51 lines (41 loc) · 2.07 KB
/
pipeline.py
File metadata and controls
51 lines (41 loc) · 2.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# SPDX-FileCopyrightText: 2026-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from haystack import Document, Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from pyversity import Strategy
from haystack_integrations.components.rankers.pyversity import PyversityReranker
# Index documents
document_store = InMemoryDocumentStore()
raw_documents = [
Document(content="Paris is the capital of France."),
Document(content="The Eiffel Tower is located in Paris."),
Document(content="Berlin is the capital of Germany."),
Document(content="The Brandenburg Gate is in Berlin."),
Document(content="France borders Spain to the south."),
Document(content="The Louvre is the world's largest art museum and is in Paris."),
Document(content="Munich is the capital of Bavaria."),
Document(content="The Rhine river flows through Germany and France."),
]
doc_embedder = SentenceTransformersDocumentEmbedder()
documents_with_embeddings = doc_embedder.run(raw_documents)["documents"]
document_store.write_documents(documents_with_embeddings)
# Build pipeline
pipeline = Pipeline()
pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
pipeline.add_component(
"retriever",
InMemoryEmbeddingRetriever(document_store=document_store, top_k=6, return_embedding=True),
)
pipeline.add_component("reranker", PyversityReranker(k=3, strategy=Strategy.MMR, diversity=0.7))
pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
pipeline.connect("retriever.documents", "reranker.documents")
# Run
result = pipeline.run({"text_embedder": {"text": "What are the famous landmarks in France?"}})
for doc in result["reranker"]["documents"]:
print(f"{doc.score:.4f} {doc.content}")
# 0.xxxx Paris is the capital of France.
# 0.xxxx The Eiffel Tower is located in Paris.
# 0.xxxx France borders Spain to the south.