Skip to content

Commit f26bee0

Browse files
authored
Merge pull request #2041 from weaviate/text2vec-digitalocean
Add text2vec-digitalocean vectorizer module
2 parents 996c8c3 + 22aa8fc commit f26bee0

3 files changed

Lines changed: 72 additions & 0 deletions

File tree

test/collection/test_config.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,6 +2373,25 @@ def test_config_with_named_vectors(
23732373
}
23742374
},
23752375
),
2376+
(
2377+
[
2378+
Configure.Vectors.text2vec_digitalocean(
2379+
name="test", source_properties=["prop"], model="qwen2"
2380+
)
2381+
],
2382+
{
2383+
"test": {
2384+
"vectorizer": {
2385+
"text2vec-digitalocean": {
2386+
"vectorizeClassName": True,
2387+
"properties": ["prop"],
2388+
"model": "qwen2",
2389+
}
2390+
},
2391+
"vectorIndexType": "hnsw",
2392+
}
2393+
},
2394+
),
23762395
(
23772396
[Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])],
23782397
{

weaviate/collections/classes/config_vectorizers.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class Vectorizers(str, Enum):
119119
TEXT2VEC_COHERE = "text2vec-cohere"
120120
TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary"
121121
TEXT2VEC_DATABRICKS = "text2vec-databricks"
122+
TEXT2VEC_DIGITALOCEAN = "text2vec-digitalocean"
122123
TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
123124
TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
124125
TEXT2VEC_MISTRAL = "text2vec-mistral"
@@ -286,6 +287,21 @@ def _to_dict(self) -> Dict[str, Any]:
286287
return ret_dict
287288

288289

290+
class _Text2VecDigitalOceanConfig(_VectorizerConfigCreate):
291+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
292+
default=Vectorizers.TEXT2VEC_DIGITALOCEAN, frozen=True, exclude=True
293+
)
294+
model: str
295+
vectorizeClassName: bool
296+
baseURL: Optional[AnyHttpUrl]
297+
298+
def _to_dict(self) -> Dict[str, Any]:
299+
ret_dict = super()._to_dict()
300+
if self.baseURL is not None:
301+
ret_dict["baseURL"] = self.baseURL.unicode_string()
302+
return ret_dict
303+
304+
289305
class _Text2VecMorphConfig(_VectorizerConfigCreate):
290306
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
291307
default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True

weaviate/collections/classes/config_vectors.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
_Text2VecCohereConfig,
6060
_Text2VecContextionaryConfig,
6161
_Text2VecDatabricksConfig,
62+
_Text2VecDigitalOceanConfig,
6263
_Text2VecGoogleConfig,
6364
_Text2VecGPT4AllConfig,
6465
_Text2VecHuggingFaceConfig,
@@ -620,6 +621,42 @@ def text2vec_mistral(
620621
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
621622
)
622623

624+
@staticmethod
625+
def text2vec_digitalocean(
626+
*,
627+
name: Optional[str] = None,
628+
quantizer: Optional[_QuantizerConfigCreate] = None,
629+
base_url: Optional[AnyHttpUrl] = None,
630+
model: str,
631+
source_properties: Optional[List[str]] = None,
632+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
633+
vectorize_collection_name: bool = True,
634+
) -> _VectorConfigCreate:
635+
"""Create a vector using the `text2vec-digitalocean` module.
636+
637+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
638+
for detailed usage.
639+
640+
Args:
641+
name: The name of the vector.
642+
quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
643+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
644+
model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
645+
source_properties: Which properties should be included when vectorizing. By default all text properties are included.
646+
vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
647+
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
648+
"""
649+
return _VectorConfigCreate(
650+
name=name,
651+
source_properties=source_properties,
652+
vectorizer=_Text2VecDigitalOceanConfig(
653+
baseURL=base_url,
654+
model=model,
655+
vectorizeClassName=vectorize_collection_name,
656+
),
657+
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
658+
)
659+
623660
@staticmethod
624661
def text2vec_morph(
625662
*,

0 commit comments

Comments
 (0)