Skip to content

Commit 9eaf424

Browse files
mpartipiloclaude
andcommitted
Add text2vec-digitalocean vectorizer module
Adds `text2vec-digitalocean` to the `Vectorizers` enum and exposes factory methods on `Configure.Vectorizer`, `Configure.NamedVectors`, and `Configure.Vectors`. The module accepts an optional `base_url` (server default `https://inference.do-ai.run`) and a `model` (required by the server, e.g. `qwen3-embedding-0.6b`). The shape mirrors `text2vec-mistral` exactly (model + baseURL + vectorizeClassName), so serialization, URL normalization, and the existing _to_dict baseURL-stripping path are reused unchanged. Closes #2038 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 996c8c3 commit 9eaf424

4 files changed

Lines changed: 151 additions & 0 deletions

File tree

test/collection/test_config.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,20 @@ def test_basic_config():
329329
}
330330
},
331331
),
332+
(
333+
Configure.Vectorizer.text2vec_digitalocean(
334+
vectorize_collection_name=False,
335+
model="qwen3-embedding-0.6b",
336+
base_url="https://inference.do-ai.run",
337+
),
338+
{
339+
"text2vec-digitalocean": {
340+
"vectorizeClassName": False,
341+
"model": "qwen3-embedding-0.6b",
342+
"baseURL": "https://inference.do-ai.run/",
343+
}
344+
},
345+
),
332346
(
333347
Configure.Vectorizer.text2vec_palm(
334348
project_id="project",
@@ -1771,6 +1785,20 @@ def test_vector_config_flat_pq() -> None:
17711785
}
17721786
},
17731787
),
1788+
(
1789+
[Configure.NamedVectors.text2vec_digitalocean(name="test", source_properties=["prop"])],
1790+
{
1791+
"test": {
1792+
"vectorizer": {
1793+
"text2vec-digitalocean": {
1794+
"vectorizeClassName": True,
1795+
"properties": ["prop"],
1796+
}
1797+
},
1798+
"vectorIndexType": "hnsw",
1799+
}
1800+
},
1801+
),
17741802
(
17751803
[
17761804
Configure.NamedVectors.text2vec_palm(
@@ -2373,6 +2401,20 @@ def test_config_with_named_vectors(
23732401
}
23742402
},
23752403
),
2404+
(
2405+
[Configure.Vectors.text2vec_digitalocean(name="test", source_properties=["prop"])],
2406+
{
2407+
"test": {
2408+
"vectorizer": {
2409+
"text2vec-digitalocean": {
2410+
"vectorizeClassName": True,
2411+
"properties": ["prop"],
2412+
}
2413+
},
2414+
"vectorIndexType": "hnsw",
2415+
}
2416+
},
2417+
),
23762418
(
23772419
[Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])],
23782420
{

weaviate/collections/classes/config_named_vectors.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
_Text2VecCohereConfig,
4949
_Text2VecContextionaryConfig,
5050
_Text2VecDatabricksConfig,
51+
_Text2VecDigitalOceanConfig,
5152
_Text2VecGoogleConfig,
5253
_Text2VecGPT4AllConfig,
5354
_Text2VecHuggingFaceConfig,
@@ -358,6 +359,40 @@ def text2vec_mistral(
358359
vector_index_config=vector_index_config,
359360
)
360361

362+
@staticmethod
363+
def text2vec_digitalocean(
364+
name: str,
365+
*,
366+
base_url: Optional[AnyHttpUrl] = None,
367+
model: Optional[str] = None,
368+
source_properties: Optional[List[str]] = None,
369+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
370+
vectorize_collection_name: bool = True,
371+
) -> _NamedVectorConfigCreate:
372+
"""Create a named vector using the `text2vec-digitalocean` model.
373+
374+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
375+
for detailed usage.
376+
377+
Args:
378+
name: The name of the named vector.
379+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
380+
model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
381+
source_properties: Which properties should be included when vectorizing. By default all text properties are included.
382+
vector_index_config: The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
383+
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
384+
"""
385+
return _NamedVectorConfigCreate(
386+
name=name,
387+
source_properties=source_properties,
388+
vectorizer=_Text2VecDigitalOceanConfig(
389+
baseURL=base_url,
390+
model=model,
391+
vectorizeClassName=vectorize_collection_name,
392+
),
393+
vector_index_config=vector_index_config,
394+
)
395+
361396
@staticmethod
362397
def text2vec_ollama(
363398
name: str,

weaviate/collections/classes/config_vectorizers.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class Vectorizers(str, Enum):
119119
TEXT2VEC_COHERE = "text2vec-cohere"
120120
TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary"
121121
TEXT2VEC_DATABRICKS = "text2vec-databricks"
122+
TEXT2VEC_DIGITALOCEAN = "text2vec-digitalocean"
122123
TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
123124
TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
124125
TEXT2VEC_MISTRAL = "text2vec-mistral"
@@ -286,6 +287,21 @@ def _to_dict(self) -> Dict[str, Any]:
286287
return ret_dict
287288

288289

290+
class _Text2VecDigitalOceanConfig(_VectorizerConfigCreate):
291+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
292+
default=Vectorizers.TEXT2VEC_DIGITALOCEAN, frozen=True, exclude=True
293+
)
294+
model: Optional[str]
295+
vectorizeClassName: bool
296+
baseURL: Optional[AnyHttpUrl]
297+
298+
def _to_dict(self) -> Dict[str, Any]:
299+
ret_dict = super()._to_dict()
300+
if self.baseURL is not None:
301+
ret_dict["baseURL"] = self.baseURL.unicode_string()
302+
return ret_dict
303+
304+
289305
class _Text2VecMorphConfig(_VectorizerConfigCreate):
290306
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
291307
default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True
@@ -1084,6 +1100,27 @@ def text2vec_mistral(
10841100
baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name
10851101
)
10861102

1103+
@staticmethod
1104+
def text2vec_digitalocean(
1105+
*,
1106+
base_url: Optional[AnyHttpUrl] = None,
1107+
model: Optional[str] = None,
1108+
vectorize_collection_name: bool = True,
1109+
) -> _VectorizerConfigCreate:
1110+
"""Create a `_Text2VecDigitalOceanConfig` object for use when vectorizing using the `text2vec-digitalocean` model.
1111+
1112+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
1113+
for detailed usage.
1114+
1115+
Args:
1116+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
1117+
model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
1118+
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
1119+
"""
1120+
return _Text2VecDigitalOceanConfig(
1121+
baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name
1122+
)
1123+
10871124
@staticmethod
10881125
def text2vec_ollama(
10891126
*,

weaviate/collections/classes/config_vectors.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
_Text2VecCohereConfig,
6060
_Text2VecContextionaryConfig,
6161
_Text2VecDatabricksConfig,
62+
_Text2VecDigitalOceanConfig,
6263
_Text2VecGoogleConfig,
6364
_Text2VecGPT4AllConfig,
6465
_Text2VecHuggingFaceConfig,
@@ -620,6 +621,42 @@ def text2vec_mistral(
620621
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
621622
)
622623

624+
@staticmethod
625+
def text2vec_digitalocean(
626+
*,
627+
name: Optional[str] = None,
628+
quantizer: Optional[_QuantizerConfigCreate] = None,
629+
base_url: Optional[AnyHttpUrl] = None,
630+
model: Optional[str] = None,
631+
source_properties: Optional[List[str]] = None,
632+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
633+
vectorize_collection_name: bool = True,
634+
) -> _VectorConfigCreate:
635+
"""Create a vector using the `text2vec-digitalocean` module.
636+
637+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
638+
for detailed usage.
639+
640+
Args:
641+
name: The name of the vector.
642+
quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
643+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
644+
model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
645+
source_properties: Which properties should be included when vectorizing. By default all text properties are included.
646+
vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
647+
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
648+
"""
649+
return _VectorConfigCreate(
650+
name=name,
651+
source_properties=source_properties,
652+
vectorizer=_Text2VecDigitalOceanConfig(
653+
baseURL=base_url,
654+
model=model,
655+
vectorizeClassName=vectorize_collection_name,
656+
),
657+
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
658+
)
659+
623660
@staticmethod
624661
def text2vec_morph(
625662
*,

0 commit comments

Comments
 (0)