Skip to content

Commit 983559f

Browse files
jliounisPSI Bot
authored andcommitted
Add PerplexityChatGenerator and Perplexity{Text,Document}Embedder
1 parent 006021e commit 983559f

13 files changed

Lines changed: 1254 additions & 1 deletion

File tree

integrations/perplexity/pydoc/config_docusaurus.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
loaders:
22
- modules:
3+
- haystack_integrations.components.embedders.perplexity.document_embedder
4+
- haystack_integrations.components.embedders.perplexity.text_embedder
5+
- haystack_integrations.components.generators.perplexity.chat.chat_generator
36
- haystack_integrations.components.websearch.perplexity.perplexity_websearch
47
search_path: [../src]
58
processors:

integrations/perplexity/pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ integration = 'pytest -m "integration" {args:tests}'
6767
all = 'pytest {args:tests}'
6868
unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
6969
integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
70-
types = "mypy -p haystack_integrations.components.websearch.perplexity {args}"
70+
types = """mypy -p haystack_integrations.components.websearch.perplexity \
71+
-p haystack_integrations.components.generators.perplexity \
72+
-p haystack_integrations.components.embedders.perplexity {args}"""
7173

7274
[tool.mypy]
7375
install_types = true
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack_integrations.components.embedders.perplexity.document_embedder import (
6+
PerplexityDocumentEmbedder,
7+
)
8+
from haystack_integrations.components.embedders.perplexity.text_embedder import (
9+
PerplexityTextEmbedder,
10+
)
11+
12+
__all__ = ["PerplexityDocumentEmbedder", "PerplexityTextEmbedder"]
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import importlib.metadata
6+
from typing import Any, ClassVar
7+
8+
from haystack import component, default_to_dict
9+
from haystack.components.embedders import OpenAIDocumentEmbedder
10+
from haystack.utils.auth import Secret
11+
12+
_INTEGRATION_SLUG = "haystack"
13+
_PACKAGE_NAME = "perplexity-haystack"
14+
15+
16+
def _attribution_header() -> str:
17+
try:
18+
version = importlib.metadata.version(_PACKAGE_NAME)
19+
except importlib.metadata.PackageNotFoundError:
20+
version = "unknown"
21+
return f"{_INTEGRATION_SLUG}/{version}"
22+
23+
24+
def _http_client_kwargs_with_attribution(
25+
http_client_kwargs: dict[str, Any] | None,
26+
) -> dict[str, Any]:
27+
kwargs = dict(http_client_kwargs or {})
28+
headers = dict(kwargs.get("headers", {}))
29+
headers["X-Pplx-Integration"] = _attribution_header()
30+
kwargs["headers"] = headers
31+
return kwargs
32+
33+
34+
@component
35+
class PerplexityDocumentEmbedder(OpenAIDocumentEmbedder):
36+
"""
37+
A component for computing Document embeddings using Perplexity models.
38+
39+
The embedding of each Document is stored in the `embedding` field of the Document.
40+
For supported models, see the
41+
[Perplexity Embeddings API reference](https://docs.perplexity.ai/api-reference/embeddings-post).
42+
43+
Usage example:
44+
```python
45+
from haystack import Document
46+
from haystack_integrations.components.embedders.perplexity import PerplexityDocumentEmbedder
47+
48+
doc = Document(content="I love pizza!")
49+
50+
document_embedder = PerplexityDocumentEmbedder()
51+
52+
result = document_embedder.run([doc])
53+
print(result['documents'][0].embedding)
54+
```
55+
"""
56+
57+
SUPPORTED_MODELS: ClassVar[list[str]] = [
58+
"pplx-embed-v1-0.6b",
59+
"pplx-embed-v1-4b",
60+
]
61+
"""A list of models supported by the Perplexity Embeddings API.
62+
See https://docs.perplexity.ai/api-reference/embeddings-post for the current list of model IDs."""
63+
64+
def __init__(
65+
self,
66+
*,
67+
api_key: Secret = Secret.from_env_var("PERPLEXITY_API_KEY"),
68+
model: str = "pplx-embed-v1-0.6b",
69+
api_base_url: str | None = "https://api.perplexity.ai",
70+
prefix: str = "",
71+
suffix: str = "",
72+
batch_size: int = 32,
73+
progress_bar: bool = True,
74+
meta_fields_to_embed: list[str] | None = None,
75+
embedding_separator: str = "\n",
76+
timeout: float | None = None,
77+
max_retries: int | None = None,
78+
http_client_kwargs: dict[str, Any] | None = None,
79+
) -> None:
80+
"""
81+
Creates a PerplexityDocumentEmbedder component.
82+
83+
:param api_key:
84+
The Perplexity API key.
85+
:param model:
86+
The name of the model to use.
87+
:param api_base_url:
88+
The Perplexity API base URL.
89+
:param prefix:
90+
A string to add to the beginning of each text.
91+
:param suffix:
92+
A string to add to the end of each text.
93+
:param batch_size:
94+
Number of Documents to encode at once.
95+
:param progress_bar:
96+
Whether to show a progress bar or not. Can be helpful to disable in production deployments to keep
97+
the logs clean.
98+
:param meta_fields_to_embed:
99+
List of meta fields that should be embedded along with the Document text.
100+
:param embedding_separator:
101+
Separator used to concatenate the meta fields to the Document text.
102+
:param timeout:
103+
Timeout for Perplexity client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment
104+
variable, or 30 seconds.
105+
:param max_retries:
106+
Maximum number of retries to contact Perplexity after an internal error.
107+
If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
108+
:param http_client_kwargs:
109+
A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
110+
For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
111+
"""
112+
super(PerplexityDocumentEmbedder, self).__init__( # noqa: UP008
113+
api_key=api_key,
114+
model=model,
115+
dimensions=None,
116+
api_base_url=api_base_url,
117+
organization=None,
118+
prefix=prefix,
119+
suffix=suffix,
120+
batch_size=batch_size,
121+
progress_bar=progress_bar,
122+
meta_fields_to_embed=meta_fields_to_embed,
123+
embedding_separator=embedding_separator,
124+
timeout=timeout,
125+
max_retries=max_retries,
126+
http_client_kwargs=_http_client_kwargs_with_attribution(http_client_kwargs),
127+
)
128+
self.http_client_kwargs = http_client_kwargs
129+
self.timeout = timeout
130+
self.max_retries = max_retries
131+
132+
def to_dict(self) -> dict[str, Any]:
133+
"""
134+
Serializes the component to a dictionary.
135+
136+
:returns:
137+
Dictionary with serialized data.
138+
"""
139+
return default_to_dict(
140+
self,
141+
api_key=self.api_key.to_dict(),
142+
model=self.model,
143+
api_base_url=self.api_base_url,
144+
prefix=self.prefix,
145+
suffix=self.suffix,
146+
batch_size=self.batch_size,
147+
progress_bar=self.progress_bar,
148+
meta_fields_to_embed=self.meta_fields_to_embed,
149+
embedding_separator=self.embedding_separator,
150+
timeout=self.timeout,
151+
max_retries=self.max_retries,
152+
http_client_kwargs=self.http_client_kwargs,
153+
)
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import importlib.metadata
6+
from typing import Any, ClassVar
7+
8+
from haystack import component, default_to_dict
9+
from haystack.components.embedders import OpenAITextEmbedder
10+
from haystack.utils.auth import Secret
11+
12+
_INTEGRATION_SLUG = "haystack"
13+
_PACKAGE_NAME = "perplexity-haystack"
14+
15+
16+
def _attribution_header() -> str:
17+
try:
18+
version = importlib.metadata.version(_PACKAGE_NAME)
19+
except importlib.metadata.PackageNotFoundError:
20+
version = "unknown"
21+
return f"{_INTEGRATION_SLUG}/{version}"
22+
23+
24+
def _http_client_kwargs_with_attribution(
25+
http_client_kwargs: dict[str, Any] | None,
26+
) -> dict[str, Any]:
27+
kwargs = dict(http_client_kwargs or {})
28+
headers = dict(kwargs.get("headers", {}))
29+
headers["X-Pplx-Integration"] = _attribution_header()
30+
kwargs["headers"] = headers
31+
return kwargs
32+
33+
34+
@component
35+
class PerplexityTextEmbedder(OpenAITextEmbedder):
36+
"""
37+
A component for embedding strings using Perplexity models.
38+
39+
For supported models, see the
40+
[Perplexity Embeddings API reference](https://docs.perplexity.ai/api-reference/embeddings-post).
41+
42+
Usage example:
43+
```python
44+
from haystack_integrations.components.embedders.perplexity.text_embedder import PerplexityTextEmbedder
45+
46+
text_to_embed = "I love pizza!"
47+
text_embedder = PerplexityTextEmbedder()
48+
print(text_embedder.run(text_to_embed))
49+
```
50+
"""
51+
52+
SUPPORTED_MODELS: ClassVar[list[str]] = [
53+
"pplx-embed-v1-0.6b",
54+
"pplx-embed-v1-4b",
55+
]
56+
"""A list of models supported by the Perplexity Embeddings API.
57+
See https://docs.perplexity.ai/api-reference/embeddings-post for the current list of model IDs."""
58+
59+
def __init__(
60+
self,
61+
*,
62+
api_key: Secret = Secret.from_env_var("PERPLEXITY_API_KEY"),
63+
model: str = "pplx-embed-v1-0.6b",
64+
api_base_url: str | None = "https://api.perplexity.ai",
65+
prefix: str = "",
66+
suffix: str = "",
67+
timeout: float | None = None,
68+
max_retries: int | None = None,
69+
http_client_kwargs: dict[str, Any] | None = None,
70+
) -> None:
71+
"""
72+
Creates a PerplexityTextEmbedder component.
73+
74+
:param api_key:
75+
The Perplexity API key.
76+
:param model:
77+
The name of the Perplexity embedding model to be used.
78+
:param api_base_url:
79+
The Perplexity API base URL.
80+
:param prefix:
81+
A string to add to the beginning of each text.
82+
:param suffix:
83+
A string to add to the end of each text.
84+
:param timeout:
85+
Timeout for Perplexity client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment
86+
variable, or 30 seconds.
87+
:param max_retries:
88+
Maximum number of retries to contact Perplexity after an internal error.
89+
If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
90+
:param http_client_kwargs:
91+
A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
92+
For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
93+
"""
94+
95+
super(PerplexityTextEmbedder, self).__init__( # noqa: UP008
96+
api_key=api_key,
97+
model=model,
98+
dimensions=None,
99+
api_base_url=api_base_url,
100+
organization=None,
101+
prefix=prefix,
102+
suffix=suffix,
103+
timeout=timeout,
104+
max_retries=max_retries,
105+
http_client_kwargs=_http_client_kwargs_with_attribution(http_client_kwargs),
106+
)
107+
self.http_client_kwargs = http_client_kwargs
108+
self.timeout = timeout
109+
self.max_retries = max_retries
110+
111+
def to_dict(self) -> dict[str, Any]:
112+
"""
113+
Serializes the component to a dictionary.
114+
115+
:returns:
116+
Dictionary with serialized data.
117+
"""
118+
return default_to_dict(
119+
self,
120+
api_key=self.api_key.to_dict(),
121+
model=self.model,
122+
api_base_url=self.api_base_url,
123+
prefix=self.prefix,
124+
suffix=self.suffix,
125+
timeout=self.timeout,
126+
max_retries=self.max_retries,
127+
http_client_kwargs=self.http_client_kwargs,
128+
)

integrations/perplexity/src/haystack_integrations/components/embedders/py.typed

Whitespace-only changes.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack_integrations.components.generators.perplexity.chat.chat_generator import (
6+
PerplexityChatGenerator,
7+
)
8+
9+
__all__ = ["PerplexityChatGenerator"]
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack_integrations.components.generators.perplexity.chat.chat_generator import (
6+
PerplexityChatGenerator,
7+
)
8+
9+
__all__ = ["PerplexityChatGenerator"]

0 commit comments

Comments
 (0)