Skip to content

Commit fdbe883

Browse files
authored
fix!: Cohere Embedders - set embed-v4.0 as the default model (#3111)
* fix: Cohere Embedders - set embed-v4.0 as the default model * fix emb size
1 parent f6f7d8e commit fdbe883

4 files changed

Lines changed: 20 additions & 30 deletions

File tree

integrations/cohere/src/haystack_integrations/components/embedders/cohere/document_embedder.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class CohereDocumentEmbedder:
4949
def __init__(
5050
self,
5151
api_key: Secret = Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"]),
52-
model: str = "embed-english-v2.0",
52+
model: str = "embed-v4.0",
5353
input_type: str = "search_document",
5454
api_base_url: str = "https://api.cohere.com",
5555
truncate: str = "END",
@@ -64,15 +64,10 @@ def __init__(
6464
Initialize the CohereDocumentEmbedder.
6565
6666
:param api_key: the Cohere API key.
67-
:param model: the name of the model to use. Supported Models are:
68-
`"embed-english-v3.0"`, `"embed-english-light-v3.0"`, `"embed-multilingual-v3.0"`,
69-
`"embed-multilingual-light-v3.0"`, `"embed-english-v2.0"`, `"embed-english-light-v2.0"`,
70-
`"embed-multilingual-v2.0"`. This list of all supported models can be found in the
71-
[model documentation](https://docs.cohere.com/docs/models#representation).
67+
:param model: the name of the model to use.
68+
Read [Cohere documentation](https://docs.cohere.com/docs/models#embed) for a list of all supported models.
7269
:param input_type: specifies the type of input you're giving to the model. Supported values are
73-
"search_document", "search_query", "classification" and "clustering". Not
74-
required for older versions of the embedding models (meaning anything lower than v3), but is required for
75-
more recent versions (meaning anything bigger than v2).
70+
"search_document", "search_query", "classification" and "clustering".
7671
:param api_base_url: the Cohere API Base url.
7772
:param truncate: truncate embeddings that are too long from start or end, ("NONE"|"START"|"END").
7873
Passing "START" will discard the start of the input. "END" will discard the end of the input. In both

integrations/cohere/src/haystack_integrations/components/embedders/cohere/text_embedder.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class CohereTextEmbedder:
4545
def __init__(
4646
self,
4747
api_key: Secret = Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"]),
48-
model: str = "embed-english-v2.0",
48+
model: str = "embed-v4.0",
4949
input_type: str = "search_query",
5050
api_base_url: str = "https://api.cohere.com",
5151
truncate: str = "END",
@@ -56,15 +56,10 @@ def __init__(
5656
Initialize the CohereTextEmbedder.
5757
5858
:param api_key: the Cohere API key.
59-
:param model: the name of the model to use. Supported Models are:
60-
`"embed-english-v3.0"`, `"embed-english-light-v3.0"`, `"embed-multilingual-v3.0"`,
61-
`"embed-multilingual-light-v3.0"`, `"embed-english-v2.0"`, `"embed-english-light-v2.0"`,
62-
`"embed-multilingual-v2.0"`. This list of all supported models can be found in the
63-
[model documentation](https://docs.cohere.com/docs/models#representation).
59+
:param model: the name of the model to use.
60+
Read [Cohere documentation](https://docs.cohere.com/docs/models#embed) for a list of all supported models.
6461
:param input_type: specifies the type of input you're giving to the model. Supported values are
65-
"search_document", "search_query", "classification" and "clustering". Not
66-
required for older versions of the embedding models (meaning anything lower than v3), but is required for
67-
more recent versions (meaning anything bigger than v2).
62+
"search_document", "search_query", "classification" and "clustering".
6863
:param api_base_url: the Cohere API Base url.
6964
:param truncate: truncate embeddings that are too long from start or end, ("NONE"|"START"|"END").
7065
Passing "START" will discard the start of the input. "END" will discard the end of the input. In both

integrations/cohere/tests/test_document_embedder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_init_default(self, monkeypatch):
2626
monkeypatch.setenv("COHERE_API_KEY", "test-api-key")
2727
embedder = CohereDocumentEmbedder()
2828
assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
29-
assert embedder.model == "embed-english-v2.0"
29+
assert embedder.model == "embed-v4.0"
3030
assert embedder.input_type == "search_document"
3131
assert embedder.api_base_url == COHERE_API_URL
3232
assert embedder.truncate == "END"
@@ -70,7 +70,7 @@ def test_to_dict(self, monkeypatch):
7070
"type": "haystack_integrations.components.embedders.cohere.document_embedder.CohereDocumentEmbedder",
7171
"init_parameters": {
7272
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
73-
"model": "embed-english-v2.0",
73+
"model": "embed-v4.0",
7474
"input_type": "search_document",
7575
"api_base_url": COHERE_API_URL,
7676
"truncate": "END",
@@ -122,7 +122,7 @@ def test_from_dict(self, monkeypatch):
122122
"type": "haystack_integrations.components.embedders.cohere.document_embedder.CohereDocumentEmbedder",
123123
"init_parameters": {
124124
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
125-
"model": "embed-english-v2.0",
125+
"model": "embed-v4.0",
126126
"input_type": "search_document",
127127
"api_base_url": COHERE_API_URL,
128128
"truncate": "END",
@@ -137,7 +137,7 @@ def test_from_dict(self, monkeypatch):
137137
}
138138
embedder = CohereDocumentEmbedder.from_dict(component_dict)
139139
assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
140-
assert embedder.model == "embed-english-v2.0"
140+
assert embedder.model == "embed-v4.0"
141141
assert embedder.input_type == "search_document"
142142
assert embedder.api_base_url == COHERE_API_URL
143143
assert embedder.truncate == "END"
@@ -253,7 +253,7 @@ async def test_run_async_does_not_modify_original_documents(self, mock_get_respo
253253
)
254254
@pytest.mark.integration
255255
def test_live_run(self):
256-
embedder = CohereDocumentEmbedder(model="embed-english-v2.0", embedding_type=EmbeddingTypes.FLOAT)
256+
embedder = CohereDocumentEmbedder(model="embed-v4.0", embedding_type=EmbeddingTypes.FLOAT)
257257

258258
docs = [
259259
Document(content="I love cheese", meta={"topic": "Cuisine"}),
@@ -276,7 +276,7 @@ def test_live_run(self):
276276
)
277277
@pytest.mark.integration
278278
async def test_live_run_async(self):
279-
embedder = CohereDocumentEmbedder(model="embed-english-v2.0", embedding_type=EmbeddingTypes.FLOAT)
279+
embedder = CohereDocumentEmbedder(model="embed-v4.0", embedding_type=EmbeddingTypes.FLOAT)
280280

281281
docs = [
282282
Document(content="I love cheese", meta={"topic": "Cuisine"}),

integrations/cohere/tests/test_text_embedder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_init_default(self, monkeypatch):
2828
embedder = CohereTextEmbedder()
2929

3030
assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
31-
assert embedder.model == "embed-english-v2.0"
31+
assert embedder.model == "embed-v4.0"
3232
assert embedder.input_type == "search_query"
3333
assert embedder.api_base_url == COHERE_API_URL
3434
assert embedder.truncate == "END"
@@ -65,7 +65,7 @@ def test_to_dict(self, monkeypatch):
6565
"type": "haystack_integrations.components.embedders.cohere.text_embedder.CohereTextEmbedder",
6666
"init_parameters": {
6767
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
68-
"model": "embed-english-v2.0",
68+
"model": "embed-v4.0",
6969
"input_type": "search_query",
7070
"api_base_url": COHERE_API_URL,
7171
"truncate": "END",
@@ -108,7 +108,7 @@ def test_from_dict(self, monkeypatch):
108108
"type": "haystack_integrations.components.embedders.cohere.text_embedder.CohereTextEmbedder",
109109
"init_parameters": {
110110
"api_key": {"env_vars": ["COHERE_API_KEY", "CO_API_KEY"], "strict": True, "type": "env_var"},
111-
"model": "embed-english-v2.0",
111+
"model": "embed-v4.0",
112112
"input_type": "search_query",
113113
"api_base_url": COHERE_API_URL,
114114
"truncate": "END",
@@ -120,7 +120,7 @@ def test_from_dict(self, monkeypatch):
120120

121121
embedder = CohereTextEmbedder.from_dict(component_dict)
122122
assert embedder.api_key == Secret.from_env_var(["COHERE_API_KEY", "CO_API_KEY"])
123-
assert embedder.model == "embed-english-v2.0"
123+
assert embedder.model == "embed-v4.0"
124124
assert embedder.input_type == "search_query"
125125
assert embedder.api_base_url == COHERE_API_URL
126126
assert embedder.truncate == "END"
@@ -148,7 +148,7 @@ def test_run(self):
148148
text = "The food was delicious"
149149
result = embedder.run(text=text)
150150

151-
assert len(result["embedding"]) == 4096
151+
assert len(result["embedding"]) == 1536
152152
assert all(isinstance(x, float) for x in result["embedding"])
153153

154154
@pytest.mark.asyncio
@@ -162,5 +162,5 @@ async def test_run_async(self):
162162
text = "The food was delicious"
163163
result = await embedder.run_async(text=text)
164164

165-
assert len(result["embedding"]) == 4096
165+
assert len(result["embedding"]) == 1536
166166
assert all(isinstance(x, float) for x in result["embedding"])

0 commit comments

Comments
 (0)