Add text2vec-digitalocean vectorizer module

mpartipilo · claude · mpartipilo · commit 9eaf424c6307 · 2026-05-18T12:40:54.000+02:00
Adds `text2vec-digitalocean` to the `Vectorizers` enum and exposes factory methods on `Configure.Vectorizer`, `Configure.NamedVectors`, and `Configure.Vectors`. The module accepts an optional `base_url` (server default `https://inference.do-ai.run`) and a `model` (required by the server, e.g. `qwen3-embedding-0.6b`). The shape mirrors `text2vec-mistral` exactly (model + baseURL + vectorizeClassName), so serialization, URL normalization, and the existing _to_dict baseURL-stripping path are reused unchanged. Closes #2038 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/test/collection/test_config.py b/test/collection/test_config.py
@@ -329,6 +329,20 @@ def test_basic_config():
             }
         },
     ),
+    (
+        Configure.Vectorizer.text2vec_digitalocean(
+            vectorize_collection_name=False,
+            model="qwen3-embedding-0.6b",
+            base_url="https://inference.do-ai.run",
+        ),
+        {
+            "text2vec-digitalocean": {
+                "vectorizeClassName": False,
+                "model": "qwen3-embedding-0.6b",
+                "baseURL": "https://inference.do-ai.run/",
+            }
+        },
+    ),
     (
         Configure.Vectorizer.text2vec_palm(
             project_id="project",
@@ -1771,6 +1785,20 @@ def test_vector_config_flat_pq() -> None:
             }
         },
     ),
+    (
+        [Configure.NamedVectors.text2vec_digitalocean(name="test", source_properties=["prop"])],
+        {
+            "test": {
+                "vectorizer": {
+                    "text2vec-digitalocean": {
+                        "vectorizeClassName": True,
+                        "properties": ["prop"],
+                    }
+                },
+                "vectorIndexType": "hnsw",
+            }
+        },
+    ),
     (
         [
             Configure.NamedVectors.text2vec_palm(
@@ -2373,6 +2401,20 @@ def test_config_with_named_vectors(
             }
         },
     ),
+    (
+        [Configure.Vectors.text2vec_digitalocean(name="test", source_properties=["prop"])],
+        {
+            "test": {
+                "vectorizer": {
+                    "text2vec-digitalocean": {
+                        "vectorizeClassName": True,
+                        "properties": ["prop"],
+                    }
+                },
+                "vectorIndexType": "hnsw",
+            }
+        },
+    ),
     (
         [Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])],
         {
diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py
@@ -48,6 +48,7 @@
     _Text2VecCohereConfig,
     _Text2VecContextionaryConfig,
     _Text2VecDatabricksConfig,
+    _Text2VecDigitalOceanConfig,
     _Text2VecGoogleConfig,
     _Text2VecGPT4AllConfig,
     _Text2VecHuggingFaceConfig,
@@ -358,6 +359,40 @@ def text2vec_mistral(
             vector_index_config=vector_index_config,
         )
 
+    @staticmethod
+    def text2vec_digitalocean(
+        name: str,
+        *,
+        base_url: Optional[AnyHttpUrl] = None,
+        model: Optional[str] = None,
+        source_properties: Optional[List[str]] = None,
+        vector_index_config: Optional[_VectorIndexConfigCreate] = None,
+        vectorize_collection_name: bool = True,
+    ) -> _NamedVectorConfigCreate:
+        """Create a named vector using the `text2vec-digitalocean` model.
+
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
+        for detailed usage.
+
+        Args:
+            name: The name of the named vector.
+            base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
+            model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
+            source_properties: Which properties should be included when vectorizing. By default all text properties are included.
+            vector_index_config: The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
+            vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
+        """
+        return _NamedVectorConfigCreate(
+            name=name,
+            source_properties=source_properties,
+            vectorizer=_Text2VecDigitalOceanConfig(
+                baseURL=base_url,
+                model=model,
+                vectorizeClassName=vectorize_collection_name,
+            ),
+            vector_index_config=vector_index_config,
+        )
+
     @staticmethod
     def text2vec_ollama(
         name: str,
diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py
@@ -119,6 +119,7 @@ class Vectorizers(str, Enum):
     TEXT2VEC_COHERE = "text2vec-cohere"
     TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary"
     TEXT2VEC_DATABRICKS = "text2vec-databricks"
+    TEXT2VEC_DIGITALOCEAN = "text2vec-digitalocean"
     TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
     TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
     TEXT2VEC_MISTRAL = "text2vec-mistral"
@@ -286,6 +287,21 @@ def _to_dict(self) -> Dict[str, Any]:
         return ret_dict
 
 
+class _Text2VecDigitalOceanConfig(_VectorizerConfigCreate):
+    vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
+        default=Vectorizers.TEXT2VEC_DIGITALOCEAN, frozen=True, exclude=True
+    )
+    model: Optional[str]
+    vectorizeClassName: bool
+    baseURL: Optional[AnyHttpUrl]
+
+    def _to_dict(self) -> Dict[str, Any]:
+        ret_dict = super()._to_dict()
+        if self.baseURL is not None:
+            ret_dict["baseURL"] = self.baseURL.unicode_string()
+        return ret_dict
+
+
 class _Text2VecMorphConfig(_VectorizerConfigCreate):
     vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
         default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True
@@ -1084,6 +1100,27 @@ def text2vec_mistral(
             baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name
         )
 
+    @staticmethod
+    def text2vec_digitalocean(
+        *,
+        base_url: Optional[AnyHttpUrl] = None,
+        model: Optional[str] = None,
+        vectorize_collection_name: bool = True,
+    ) -> _VectorizerConfigCreate:
+        """Create a `_Text2VecDigitalOceanConfig` object for use when vectorizing using the `text2vec-digitalocean` model.
+
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
+        for detailed usage.
+
+        Args:
+            base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
+            model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
+            vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
+        """
+        return _Text2VecDigitalOceanConfig(
+            baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name
+        )
+
     @staticmethod
     def text2vec_ollama(
         *,
diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py
@@ -59,6 +59,7 @@
     _Text2VecCohereConfig,
     _Text2VecContextionaryConfig,
     _Text2VecDatabricksConfig,
+    _Text2VecDigitalOceanConfig,
     _Text2VecGoogleConfig,
     _Text2VecGPT4AllConfig,
     _Text2VecHuggingFaceConfig,
@@ -620,6 +621,42 @@ def text2vec_mistral(
             vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
         )
 
+    @staticmethod
+    def text2vec_digitalocean(
+        *,
+        name: Optional[str] = None,
+        quantizer: Optional[_QuantizerConfigCreate] = None,
+        base_url: Optional[AnyHttpUrl] = None,
+        model: Optional[str] = None,
+        source_properties: Optional[List[str]] = None,
+        vector_index_config: Optional[_VectorIndexConfigCreate] = None,
+        vectorize_collection_name: bool = True,
+    ) -> _VectorConfigCreate:
+        """Create a vector using the `text2vec-digitalocean` module.
+
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings)
+        for detailed usage.
+
+        Args:
+            name: The name of the vector.
+            quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
+            base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`.
+            model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server.
+            source_properties: Which properties should be included when vectorizing. By default all text properties are included.
+            vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
+            vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
+        """
+        return _VectorConfigCreate(
+            name=name,
+            source_properties=source_properties,
+            vectorizer=_Text2VecDigitalOceanConfig(
+                baseURL=base_url,
+                model=model,
+                vectorizeClassName=vectorize_collection_name,
+            ),
+            vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
+        )
+
     @staticmethod
     def text2vec_morph(
         *,