From a24934a5806265c8cc6fa5667123088b3a785717 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 11 Apr 2025 14:45:42 +0200 Subject: [PATCH 01/27] add muvera config --- weaviate/collections/classes/config.py | 11 +++++++ .../classes/config_vector_index.py | 29 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index e67612dc7..e77f3cb8d 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -31,6 +31,7 @@ ) from weaviate.collections.classes.config_vector_index import ( _MultiVectorConfigCreate, + _MuveraConfigCreate, VectorIndexType as VectorIndexTypeAlias, VectorFilterStrategy, ) @@ -2150,9 +2151,19 @@ def __add_props( class _VectorIndexMultiVector: @staticmethod def multi_vector( + muvera_enabled: Optional[bool] = None, + muvera_ksim: Optional[int] = None, + muvera_dprojections: Optional[int] = None, + muvera_repetitions: Optional[int] = None, aggregation: Optional[MultiVectorAggregation] = None, ) -> _MultiVectorConfigCreate: return _MultiVectorConfigCreate( + muveraConfig=_MuveraConfigCreate( + enabled=muvera_enabled, + ksim=muvera_ksim, + dprojections=muvera_dprojections, + repetitions=muvera_repetitions, + ), aggregation=aggregation.value if aggregation is not None else None, ) diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 3c36bbf32..dc70ee947 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -37,12 +37,41 @@ class VectorIndexType(str, Enum): FLAT = "flat" DYNAMIC = "dynamic" +class _MuveraConfigCreate(_ConfigCreateModel): + enabled: Optional[bool] + ksim: Optional[int] + dprojections: Optional[int] + repetitions: Optional[int] + + +class _MuveraConfigCreate(_ConfigUpdateModel): + enabled: Optional[bool] + ksim: Optional[int] + dprojections: Optional[int] + repetitions: Optional[int] + + def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: + """Must be done manually since Pydantic does not work well with type and type_. + + Errors shadowing type occur if we want to use type as a field name. + """ + if self.enabled is not None: + schema["enabled"] = str(self.enabled.value) + if self.ksim is not None: + schema["ksim"] = str(self.ksim.value) + if self.dprojections is not None: + schema["dprojections"] = str(self.dprojections.value) + if self.repetitions is not None: + schema["repetitions"] = str(self.repetitions.value) + return schema + class _MultiVectorConfigCreateBase(_ConfigCreateModel): enabled: bool = Field(default=True) class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): + muveraConfig: _MuveraConfigCreate aggregation: Optional[str] From 91d43195500192b20c5a954a8c25c9dfa7fbe65e Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 11 Apr 2025 21:26:11 +0200 Subject: [PATCH 02/27] add muvera config in a separate method --- weaviate/collections/classes/config.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index e77f3cb8d..bb941e008 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2151,22 +2151,28 @@ def __add_props( class _VectorIndexMultiVector: @staticmethod def multi_vector( - muvera_enabled: Optional[bool] = None, - muvera_ksim: Optional[int] = None, - muvera_dprojections: Optional[int] = None, - muvera_repetitions: Optional[int] = None, + muvera_config: Optional[_MuveraConfigCreate] = None, aggregation: Optional[MultiVectorAggregation] = None, ) -> _MultiVectorConfigCreate: return _MultiVectorConfigCreate( - muveraConfig=_MuveraConfigCreate( - enabled=muvera_enabled, - ksim=muvera_ksim, - dprojections=muvera_dprojections, - repetitions=muvera_repetitions, - ), + muveraConfig=muvera_config if muvera_config is not None else None, aggregation=aggregation.value if aggregation is not None else None, ) + @staticmethod + def muvera( + enabled: Optional[bool] = None, + ksim: Optional[int] = None, + dprojections: Optional[int] = None, + repetitions: Optional[int] = None, + ) -> _MuveraConfigCreate: + return _MuveraConfigCreate( + enabled=enabled, + ksim=ksim, + dprojections=dprojections, + repetitions=repetitions, + ) + class _VectorIndexQuantizer: @staticmethod From b43b2466c6e24dfb63f4bb2fed365752fcf90fd6 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 11 Apr 2025 21:42:15 +0200 Subject: [PATCH 03/27] minor change --- weaviate/collections/classes/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index bb941e008..a0e34f951 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1664,6 +1664,7 @@ class _SQConfig(_ConfigBase): @dataclass class _MultiVectorConfig(_ConfigBase): + muvera_config: Optional[_MuveraConfigCreate] aggregation: str From 0b51b5cb87c313ba7f5764829c6fcc7411d136ef Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 11 Apr 2025 22:38:14 +0200 Subject: [PATCH 04/27] update config_methods --- weaviate/collections/classes/config_methods.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index a09fea60c..3aaa950ad 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -155,6 +155,7 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: if config.get("multivector") is None or not config.get("multivector", {"enabled": False}).get("enabled") else _MultiVectorConfig( + muvera_config=config["multivector"]["muvera"], aggregation=config["multivector"]["aggregation"], ) ) @@ -244,6 +245,7 @@ def __get_vector_config( ), vector_index_config=vector_index_config, ) + print(named_vectors[name]) return named_vectors else: return None From 6dcc812ebc06917e9d11c6b0c02a33897d8d4b4a Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Tue, 15 Apr 2025 12:42:49 +0200 Subject: [PATCH 05/27] changes to the config methods --- weaviate/collections/classes/config.py | 2 +- .../collections/classes/config_methods.py | 2 +- .../classes/config_vector_index.py | 32 +++---------------- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index a0e34f951..96f591196 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2161,7 +2161,7 @@ def multi_vector( ) @staticmethod - def muvera( + def muvera_config( enabled: Optional[bool] = None, ksim: Optional[int] = None, dprojections: Optional[int] = None, diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 3aaa950ad..cd4544e0c 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -245,7 +245,7 @@ def __get_vector_config( ), vector_index_config=vector_index_config, ) - print(named_vectors[name]) + return named_vectors else: return None diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index dc70ee947..e25ffcb25 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -37,44 +37,20 @@ class VectorIndexType(str, Enum): FLAT = "flat" DYNAMIC = "dynamic" -class _MuveraConfigCreate(_ConfigCreateModel): - enabled: Optional[bool] - ksim: Optional[int] - dprojections: Optional[int] - repetitions: Optional[int] - +class _MultiVectorConfigCreateBase(_ConfigCreateModel): + enabled: bool = Field(default=True) -class _MuveraConfigCreate(_ConfigUpdateModel): +class _MuveraConfigCreate(_ConfigCreateModel): enabled: Optional[bool] ksim: Optional[int] dprojections: Optional[int] repetitions: Optional[int] - def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: - """Must be done manually since Pydantic does not work well with type and type_. - - Errors shadowing type occur if we want to use type as a field name. - """ - if self.enabled is not None: - schema["enabled"] = str(self.enabled.value) - if self.ksim is not None: - schema["ksim"] = str(self.ksim.value) - if self.dprojections is not None: - schema["dprojections"] = str(self.dprojections.value) - if self.repetitions is not None: - schema["repetitions"] = str(self.repetitions.value) - return schema - - -class _MultiVectorConfigCreateBase(_ConfigCreateModel): - enabled: bool = Field(default=True) - class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): - muveraConfig: _MuveraConfigCreate + muveraConfig: Optional[_MuveraConfigCreate] aggregation: Optional[str] - class _VectorIndexConfigCreate(_ConfigCreateModel): distance: Optional[VectorDistances] multivector: Optional[_MultiVectorConfigCreate] From 4f87f38f37e1750a5a6caaf09600617f06f09840 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Tue, 15 Apr 2025 18:18:35 +0200 Subject: [PATCH 06/27] minor changes --- weaviate/collections/classes/config_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index cd4544e0c..530990769 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -155,7 +155,7 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: if config.get("multivector") is None or not config.get("multivector", {"enabled": False}).get("enabled") else _MultiVectorConfig( - muvera_config=config["multivector"]["muvera"], + muvera_config=config["multivector"]["muveraConfig"], aggregation=config["multivector"]["aggregation"], ) ) From e0b3ea523a135b7fe799403e552459156725b78c Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 12:07:53 +0200 Subject: [PATCH 07/27] reformat files for linting and formatting --- weaviate/collections/classes/config_methods.py | 2 +- weaviate/collections/classes/config_vector_index.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 530990769..e0d76b670 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -245,7 +245,7 @@ def __get_vector_config( ), vector_index_config=vector_index_config, ) - + return named_vectors else: return None diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index e25ffcb25..aea3ce231 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -37,9 +37,11 @@ class VectorIndexType(str, Enum): FLAT = "flat" DYNAMIC = "dynamic" + class _MultiVectorConfigCreateBase(_ConfigCreateModel): enabled: bool = Field(default=True) + class _MuveraConfigCreate(_ConfigCreateModel): enabled: Optional[bool] ksim: Optional[int] @@ -51,6 +53,7 @@ class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): muveraConfig: Optional[_MuveraConfigCreate] aggregation: Optional[str] + class _VectorIndexConfigCreate(_ConfigCreateModel): distance: Optional[VectorDistances] multivector: Optional[_MultiVectorConfigCreate] From a0ba9c265792b73e67dbe93e617efc00d9f398f9 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 12:15:40 +0200 Subject: [PATCH 08/27] run ruff check --- weaviate/collections/classes/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 581c74112..e7ed0d53b 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -32,7 +32,6 @@ _NamedVectorsUpdate, ) from weaviate.collections.classes.config_vector_index import ( - VectorIndexType as VectorIndexTypeAlias, VectorFilterStrategy, _MuveraConfigCreate, _MultiVectorConfigCreate, From ddcc388c57e26a7b44efa275cc199dc0ea31554a Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 19:04:17 +0200 Subject: [PATCH 09/27] change function calling for muvera creation --- weaviate/collections/classes/config.py | 47 ++++++++++++++----- .../collections/classes/config_methods.py | 3 +- .../classes/config_vector_index.py | 11 ++++- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index e7ed0d53b..447c4871f 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -34,6 +34,7 @@ from weaviate.collections.classes.config_vector_index import ( VectorFilterStrategy, _MuveraConfigCreate, + _EncodingConfigCreate, _MultiVectorConfigCreate, _QuantizerConfigCreate, _VectorIndexConfigCreate, @@ -1556,13 +1557,20 @@ class _SQConfig(_ConfigBase): BQConfig = _BQConfig SQConfig = _SQConfig +@dataclass +class _MuveraConfig(_ConfigBase): + enabled: Optional[bool] + ksim: Optional[int] + dprojections: Optional[int] + repetitions: Optional[int] + +MuveraConfig = _MuveraConfig @dataclass class _MultiVectorConfig(_ConfigBase): - muvera_config: Optional[_MuveraConfigCreate] + encoding: Optional[_EncodingConfigCreate] aggregation: str - MultiVector = _MultiVectorConfig @@ -2032,25 +2040,25 @@ def __add_props( ) ret_dict["properties"] = existing_props +class _MuveraConfigCreate(_EncodingConfigCreate): + enabled: Optional[bool] + ksim: Optional[int] + dprojections: Optional[int] + repetitions: Optional[int] -class _VectorIndexMultiVector: @staticmethod - def multi_vector( - muvera_config: Optional[_MuveraConfigCreate] = None, - aggregation: Optional[MultiVectorAggregation] = None, - ) -> _MultiVectorConfigCreate: - return _MultiVectorConfigCreate( - muveraConfig=muvera_config if muvera_config is not None else None, - aggregation=aggregation.value if aggregation is not None else None, - ) + def encoding_name() -> str: + return "muvera" + +class _VectorIndexMultivectorEncoding: @staticmethod - def muvera_config( + def muvera( enabled: Optional[bool] = None, ksim: Optional[int] = None, dprojections: Optional[int] = None, repetitions: Optional[int] = None, - ) -> _MuveraConfigCreate: + ) -> _EncodingConfigCreate: return _MuveraConfigCreate( enabled=enabled, ksim=ksim, @@ -2058,6 +2066,19 @@ def muvera_config( repetitions=repetitions, ) +class _VectorIndexMultiVector: + Encoding = _VectorIndexMultivectorEncoding + + @staticmethod + def multi_vector( + encoding: Optional[_EncodingConfigCreate] = None, + aggregation: Optional[MultiVectorAggregation] = None, + ) -> _MultiVectorConfigCreate: + return _MultiVectorConfigCreate( + encoding=encoding if encoding is not None else None, + aggregation=aggregation.value if aggregation is not None else None, + ) + class _VectorIndexQuantizer: @staticmethod diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 63c9e7621..3e039fe45 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -21,6 +21,7 @@ _InvertedIndexConfig, _MultiTenancyConfig, _MultiVectorConfig, + _MuveraConfig, _NamedVectorConfig, _NamedVectorizerConfig, _NestedProperty, @@ -155,7 +156,7 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: if config.get("multivector") is None or not config.get("multivector", {"enabled": False}).get("enabled") else _MultiVectorConfig( - muvera_config=config["multivector"]["muveraConfig"], + encoding=config["multivector"]["muvera"], aggregation=config["multivector"]["aggregation"], ) ) diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index aea3ce231..fe43c6c13 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -48,9 +48,16 @@ class _MuveraConfigCreate(_ConfigCreateModel): dprojections: Optional[int] repetitions: Optional[int] + @staticmethod + def encoding_name() -> str: + return "muvera" + + +class _EncodingConfigCreate(_MultiVectorConfigCreateBase): + enabled: bool = Field(default=True) class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): - muveraConfig: Optional[_MuveraConfigCreate] + encoding: Optional[_EncodingConfigCreate] = Field(exclude=True) aggregation: Optional[str] @@ -69,6 +76,8 @@ def _to_dict(self) -> Dict[str, Any]: ret_dict[self.quantizer.quantizer_name()] = self.quantizer._to_dict() if self.distance is not None: ret_dict["distance"] = str(self.distance.value) + if self.multivector is not None and self.multivector.encoding is not None: + ret_dict["multivector"][self.multivector.encoding.encoding_name()] = self.multivector.encoding._to_dict() return ret_dict From 9be797a36fa1ccdd38d81cf02913daadd5e8532f Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 19:13:45 +0200 Subject: [PATCH 10/27] minor changes --- weaviate/collections/classes/config_methods.py | 1 - weaviate/collections/classes/config_vector_index.py | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 3e039fe45..6f9d46321 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -21,7 +21,6 @@ _InvertedIndexConfig, _MultiTenancyConfig, _MultiVectorConfig, - _MuveraConfig, _NamedVectorConfig, _NamedVectorizerConfig, _NestedProperty, diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index fe43c6c13..a339fcc7d 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -42,7 +42,10 @@ class _MultiVectorConfigCreateBase(_ConfigCreateModel): enabled: bool = Field(default=True) -class _MuveraConfigCreate(_ConfigCreateModel): +class _EncodingConfigCreate(_MultiVectorConfigCreateBase): + enabled: bool = Field(default=True) + +class _MuveraConfigCreate(_EncodingConfigCreate): enabled: Optional[bool] ksim: Optional[int] dprojections: Optional[int] @@ -52,10 +55,6 @@ class _MuveraConfigCreate(_ConfigCreateModel): def encoding_name() -> str: return "muvera" - -class _EncodingConfigCreate(_MultiVectorConfigCreateBase): - enabled: bool = Field(default=True) - class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): encoding: Optional[_EncodingConfigCreate] = Field(exclude=True) aggregation: Optional[str] From b032751dc09ef2dfba5ee4b52a6139283e28b008 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 19:15:39 +0200 Subject: [PATCH 11/27] cleaning old code --- weaviate/collections/classes/config.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 447c4871f..289bda6f9 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2040,17 +2040,6 @@ def __add_props( ) ret_dict["properties"] = existing_props -class _MuveraConfigCreate(_EncodingConfigCreate): - enabled: Optional[bool] - ksim: Optional[int] - dprojections: Optional[int] - repetitions: Optional[int] - - @staticmethod - def encoding_name() -> str: - return "muvera" - - class _VectorIndexMultivectorEncoding: @staticmethod def muvera( From 16db8c4aee9c5cc31664a6c481f30d5c06e7efd4 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 30 Apr 2025 19:20:04 +0200 Subject: [PATCH 12/27] run ruff format --- weaviate/collections/classes/config.py | 6 ++++++ weaviate/collections/classes/config_vector_index.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 289bda6f9..7138d1a90 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1557,6 +1557,7 @@ class _SQConfig(_ConfigBase): BQConfig = _BQConfig SQConfig = _SQConfig + @dataclass class _MuveraConfig(_ConfigBase): enabled: Optional[bool] @@ -1564,13 +1565,16 @@ class _MuveraConfig(_ConfigBase): dprojections: Optional[int] repetitions: Optional[int] + MuveraConfig = _MuveraConfig + @dataclass class _MultiVectorConfig(_ConfigBase): encoding: Optional[_EncodingConfigCreate] aggregation: str + MultiVector = _MultiVectorConfig @@ -2040,6 +2044,7 @@ def __add_props( ) ret_dict["properties"] = existing_props + class _VectorIndexMultivectorEncoding: @staticmethod def muvera( @@ -2055,6 +2060,7 @@ def muvera( repetitions=repetitions, ) + class _VectorIndexMultiVector: Encoding = _VectorIndexMultivectorEncoding diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index a339fcc7d..9774cb38c 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -45,6 +45,7 @@ class _MultiVectorConfigCreateBase(_ConfigCreateModel): class _EncodingConfigCreate(_MultiVectorConfigCreateBase): enabled: bool = Field(default=True) + class _MuveraConfigCreate(_EncodingConfigCreate): enabled: Optional[bool] ksim: Optional[int] @@ -55,6 +56,7 @@ class _MuveraConfigCreate(_EncodingConfigCreate): def encoding_name() -> str: return "muvera" + class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): encoding: Optional[_EncodingConfigCreate] = Field(exclude=True) aggregation: Optional[str] @@ -76,7 +78,9 @@ def _to_dict(self) -> Dict[str, Any]: if self.distance is not None: ret_dict["distance"] = str(self.distance.value) if self.multivector is not None and self.multivector.encoding is not None: - ret_dict["multivector"][self.multivector.encoding.encoding_name()] = self.multivector.encoding._to_dict() + ret_dict["multivector"][self.multivector.encoding.encoding_name()] = ( + self.multivector.encoding._to_dict() + ) return ret_dict From a9b024d0d5ea900298dbcd97a37e74c3e88bd829 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 10:45:26 +0200 Subject: [PATCH 13/27] add bc fix --- weaviate/collections/classes/config_methods.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 6f9d46321..1cf5b285b 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -155,7 +155,9 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: if config.get("multivector") is None or not config.get("multivector", {"enabled": False}).get("enabled") else _MultiVectorConfig( - encoding=config["multivector"]["muvera"], + encoding= None + if config["multivector"].get("encoding") is None + else config["multivector"]["encoding"], aggregation=config["multivector"]["aggregation"], ) ) From 0f73881a77431e7dec461b24231911194c2ebfea Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 11:41:54 +0200 Subject: [PATCH 14/27] add get multivector encoding --- weaviate/collections/classes/config_methods.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 1cf5b285b..249a5a16d 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -21,6 +21,7 @@ _InvertedIndexConfig, _MultiTenancyConfig, _MultiVectorConfig, + _MuveraConfigCreate, _NamedVectorConfig, _NamedVectorizerConfig, _NestedProperty, @@ -149,6 +150,20 @@ def __get_quantizer_config( return quantizer +def __get_multivector_encoding(config: Dict[str, Any]) -> Optional[_MuveraConfigCreate]: + return ( + None + if config.get("encoding") is None + or not config.get("encoding", {"enabled": False}).get("enabled") + else _MuveraConfigCreate( + enabled=config["encoding"]["enabled"], + ksim=config["encoding"]["ksim"], + dprojections=config["encoding"]["dprojections"], + repetitions=config["encoding"]["repetitions"], + ) + ) + + def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: return ( None @@ -157,7 +172,7 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: else _MultiVectorConfig( encoding= None if config["multivector"].get("encoding") is None - else config["multivector"]["encoding"], + else __get_multivector_encoding(config["multivector"]["encoding"]), aggregation=config["multivector"]["aggregation"], ) ) From 50b47a5d140c8e7609fa5f8cb85bfd2d4e0f7efd Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 12:08:29 +0200 Subject: [PATCH 15/27] update muvera config --- weaviate/collections/classes/config_methods.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 249a5a16d..5607f2a7a 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -21,7 +21,7 @@ _InvertedIndexConfig, _MultiTenancyConfig, _MultiVectorConfig, - _MuveraConfigCreate, + _MuveraConfig, _NamedVectorConfig, _NamedVectorizerConfig, _NestedProperty, @@ -150,12 +150,12 @@ def __get_quantizer_config( return quantizer -def __get_multivector_encoding(config: Dict[str, Any]) -> Optional[_MuveraConfigCreate]: +def __get_multivector_encoding(config: Dict[str, Any]) -> Optional[_MuveraConfig]: return ( None if config.get("encoding") is None or not config.get("encoding", {"enabled": False}).get("enabled") - else _MuveraConfigCreate( + else _MuveraConfig( enabled=config["encoding"]["enabled"], ksim=config["encoding"]["ksim"], dprojections=config["encoding"]["dprojections"], From eb16969e660d5b33d6551d2ce517fc845259c52c Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 12:30:03 +0200 Subject: [PATCH 16/27] remove redundant flag options --- weaviate/collections/classes/config_vector_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 9774cb38c..91332d281 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -47,7 +47,6 @@ class _EncodingConfigCreate(_MultiVectorConfigCreateBase): class _MuveraConfigCreate(_EncodingConfigCreate): - enabled: Optional[bool] ksim: Optional[int] dprojections: Optional[int] repetitions: Optional[int] From e699498344df622d15146ca68536267b8542cd20 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 12:50:46 +0200 Subject: [PATCH 17/27] minor change --- weaviate/collections/classes/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 7138d1a90..4346bfdbf 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1571,7 +1571,7 @@ class _MuveraConfig(_ConfigBase): @dataclass class _MultiVectorConfig(_ConfigBase): - encoding: Optional[_EncodingConfigCreate] + encoding: Optional[_MuveraConfigCreate] aggregation: str From e4202f31c12976d9e04f3bdfc44568a10bd23604 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 13:14:23 +0200 Subject: [PATCH 18/27] minor change --- weaviate/collections/classes/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 4346bfdbf..694a40958 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1571,7 +1571,7 @@ class _MuveraConfig(_ConfigBase): @dataclass class _MultiVectorConfig(_ConfigBase): - encoding: Optional[_MuveraConfigCreate] + encoding: Optional[_MuveraConfig] aggregation: str From e2756dba5f828987de69a9815ea8050b2024d52f Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 13:48:21 +0200 Subject: [PATCH 19/27] add abstract method --- weaviate/collections/classes/config.py | 2 +- weaviate/collections/classes/config_vector_index.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 694a40958..f5725d3e5 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2048,7 +2048,7 @@ def __add_props( class _VectorIndexMultivectorEncoding: @staticmethod def muvera( - enabled: Optional[bool] = None, + enabled: bool = True, ksim: Optional[int] = None, dprojections: Optional[int] = None, repetitions: Optional[int] = None, diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 91332d281..89ac4cd7e 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -45,6 +45,10 @@ class _MultiVectorConfigCreateBase(_ConfigCreateModel): class _EncodingConfigCreate(_MultiVectorConfigCreateBase): enabled: bool = Field(default=True) + @staticmethod + @abstractmethod + def encoding_name() -> str: ... + class _MuveraConfigCreate(_EncodingConfigCreate): ksim: Optional[int] From 372e106b2e5734226512919eb5ee4c9f7f714c75 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 13:55:35 +0200 Subject: [PATCH 20/27] run ruff --- .../collections/classes/config_methods.py | 101 +++++++++++++----- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 5607f2a7a..fa5efda98 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -49,7 +49,11 @@ def _is_primitive(d_type: str) -> bool: def __get_rerank_config(schema: Dict[str, Any]) -> Optional[_RerankerConfig]: if ( len( - rerankers := [key for key in schema.get("moduleConfig", {}).keys() if "reranker" in key] + rerankers := [ + key + for key in schema.get("moduleConfig", {}).keys() + if "reranker" in key + ] ) == 1 ): @@ -69,7 +73,9 @@ def __get_generative_config(schema: Dict[str, Any]) -> Optional[_GenerativeConfi if ( len( generators := [ - key for key in schema.get("moduleConfig", {}).keys() if "generative" in key + key + for key in schema.get("moduleConfig", {}).keys() + if "generative" in key ] ) == 1 @@ -88,7 +94,10 @@ def __get_generative_config(schema: Dict[str, Any]) -> Optional[_GenerativeConfi def __get_vectorizer_config(schema: Dict[str, Any]) -> Optional[_VectorizerConfig]: - if __is_vectorizer_present(schema) is not None and schema.get("vectorizer", "none") != "none": + if ( + __is_vectorizer_present(schema) is not None + and schema.get("vectorizer", "none") != "none" + ): vec_config: Dict[str, Any] = schema["moduleConfig"].pop(schema["vectorizer"]) try: vectorizer = Vectorizers(schema["vectorizer"]) @@ -170,9 +179,11 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: if config.get("multivector") is None or not config.get("multivector", {"enabled": False}).get("enabled") else _MultiVectorConfig( - encoding= None - if config["multivector"].get("encoding") is None - else __get_multivector_encoding(config["multivector"]["encoding"]), + encoding=( + None + if config["multivector"].get("encoding") is None + else __get_multivector_encoding(config["multivector"]["encoding"]) + ), aggregation=config["multivector"]["aggregation"], ) ) @@ -214,7 +225,9 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: def __get_vector_index_config( schema: Dict[str, Any], -) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]: +) -> Union[ + _VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None +]: if "vectorIndexConfig" not in schema: return None if schema["vectorIndexType"] == "hnsw": @@ -279,16 +292,22 @@ def __get_vectorizer(schema: Dict[str, Any]) -> Optional[Union[str, Vectorizers] return vectorizer -def _collection_config_simple_from_json(schema: Dict[str, Any]) -> _CollectionConfigSimple: +def _collection_config_simple_from_json( + schema: Dict[str, Any] +) -> _CollectionConfigSimple: return _CollectionConfigSimple( name=schema["class"], description=schema.get("description"), generative_config=__get_generative_config(schema), properties=( - _properties_from_config(schema) if schema.get("properties") is not None else [] + _properties_from_config(schema) + if schema.get("properties") is not None + else [] ), references=( - _references_from_config(schema) if schema.get("properties") is not None else [] + _references_from_config(schema) + if schema.get("properties") is not None + else [] ), reranker_config=__get_rerank_config(schema), vectorizer_config=__get_vectorizer_config(schema), @@ -307,17 +326,25 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: b=schema["invertedIndexConfig"]["bm25"]["b"], k1=schema["invertedIndexConfig"]["bm25"]["k1"], ), - cleanup_interval_seconds=schema["invertedIndexConfig"]["cleanupIntervalSeconds"], - index_null_state=cast(dict, schema["invertedIndexConfig"]).get("indexNullState") + cleanup_interval_seconds=schema["invertedIndexConfig"][ + "cleanupIntervalSeconds" + ], + index_null_state=cast(dict, schema["invertedIndexConfig"]).get( + "indexNullState" + ) is True, index_property_length=cast(dict, schema["invertedIndexConfig"]).get( "indexPropertyLength" ) is True, - index_timestamps=cast(dict, schema["invertedIndexConfig"]).get("indexTimestamps") + index_timestamps=cast(dict, schema["invertedIndexConfig"]).get( + "indexTimestamps" + ) is True, stopwords=_StopwordsConfig( - preset=StopwordsPreset(schema["invertedIndexConfig"]["stopwords"]["preset"]), + preset=StopwordsPreset( + schema["invertedIndexConfig"]["stopwords"]["preset"] + ), additions=schema["invertedIndexConfig"]["stopwords"]["additions"], removals=schema["invertedIndexConfig"]["stopwords"]["removals"], ), @@ -332,16 +359,22 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: ), ), properties=( - _properties_from_config(schema) if schema.get("properties") is not None else [] + _properties_from_config(schema) + if schema.get("properties") is not None + else [] ), references=( - _references_from_config(schema) if schema.get("properties") is not None else [] + _references_from_config(schema) + if schema.get("properties") is not None + else [] ), replication_config=_ReplicationConfig( factor=schema["replicationConfig"]["factor"], async_enabled=schema["replicationConfig"].get("asyncEnabled", False), deletion_strategy=( - ReplicationDeletionStrategy(schema["replicationConfig"]["deletionStrategy"]) + ReplicationDeletionStrategy( + schema["replicationConfig"]["deletionStrategy"] + ) if "deletionStrategy" in schema["replicationConfig"] else ReplicationDeletionStrategy.NO_AUTOMATED_RESOLUTION ), @@ -369,9 +402,12 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: ) -def _collection_configs_from_json(schema: Dict[str, Any]) -> Dict[str, _CollectionConfig]: +def _collection_configs_from_json( + schema: Dict[str, Any] +) -> Dict[str, _CollectionConfig]: configs = { - schema["class"]: _collection_config_from_json(schema) for schema in schema["classes"] + schema["class"]: _collection_config_from_json(schema) + for schema in schema["classes"] } return dict(sorted(configs.items())) @@ -380,12 +416,15 @@ def _collection_configs_simple_from_json( schema: Dict[str, Any], ) -> Dict[str, _CollectionConfigSimple]: configs = { - schema["class"]: _collection_config_simple_from_json(schema) for schema in schema["classes"] + schema["class"]: _collection_config_simple_from_json(schema) + for schema in schema["classes"] } return dict(sorted(configs.items())) -def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedProperty]: +def _nested_properties_from_config( + props: List[Dict[str, Any]] +) -> List[_NestedProperty]: return [ _NestedProperty( data_type=DataType(prop["dataType"][0]), @@ -399,7 +438,9 @@ def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedP else None ), tokenization=( - Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None + Tokenization(prop["tokenization"]) + if prop.get("tokenization") is not None + else None ), ) for prop in props @@ -421,14 +462,16 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: else None ), tokenization=( - Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None + Tokenization(prop["tokenization"]) + if prop.get("tokenization") is not None + else None ), vectorizer_config=( _PropertyVectorizerConfig( skip=prop["moduleConfig"][schema["vectorizer"]].get("skip", False), - vectorize_property_name=prop["moduleConfig"][schema["vectorizer"]].get( - "vectorizePropertyName", False - ), + vectorize_property_name=prop["moduleConfig"][ + schema["vectorizer"] + ].get("vectorizePropertyName", False), ) if schema.get("vectorizer", "none") != "none" and prop.get("moduleConfig", None) is not None @@ -445,7 +488,11 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: if "vectorConfig" in schema else None ), - vectorizer=(schema.get("vectorizer", "none") if "vectorConfig" not in schema else None), + vectorizer=( + schema.get("vectorizer", "none") + if "vectorConfig" not in schema + else None + ), ) for prop in schema["properties"] if _is_primitive(prop["dataType"]) From 1366dd48644c49f1fa2e34f036aeb772ab5e2beb Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Fri, 2 May 2025 14:05:15 +0200 Subject: [PATCH 21/27] format config methods --- .../collections/classes/config_methods.py | 93 +++++-------------- 1 file changed, 24 insertions(+), 69 deletions(-) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index fa5efda98..a56a37854 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -49,11 +49,7 @@ def _is_primitive(d_type: str) -> bool: def __get_rerank_config(schema: Dict[str, Any]) -> Optional[_RerankerConfig]: if ( len( - rerankers := [ - key - for key in schema.get("moduleConfig", {}).keys() - if "reranker" in key - ] + rerankers := [key for key in schema.get("moduleConfig", {}).keys() if "reranker" in key] ) == 1 ): @@ -73,9 +69,7 @@ def __get_generative_config(schema: Dict[str, Any]) -> Optional[_GenerativeConfi if ( len( generators := [ - key - for key in schema.get("moduleConfig", {}).keys() - if "generative" in key + key for key in schema.get("moduleConfig", {}).keys() if "generative" in key ] ) == 1 @@ -94,10 +88,7 @@ def __get_generative_config(schema: Dict[str, Any]) -> Optional[_GenerativeConfi def __get_vectorizer_config(schema: Dict[str, Any]) -> Optional[_VectorizerConfig]: - if ( - __is_vectorizer_present(schema) is not None - and schema.get("vectorizer", "none") != "none" - ): + if __is_vectorizer_present(schema) is not None and schema.get("vectorizer", "none") != "none": vec_config: Dict[str, Any] = schema["moduleConfig"].pop(schema["vectorizer"]) try: vectorizer = Vectorizers(schema["vectorizer"]) @@ -225,9 +216,7 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: def __get_vector_index_config( schema: Dict[str, Any], -) -> Union[ - _VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None -]: +) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]: if "vectorIndexConfig" not in schema: return None if schema["vectorIndexType"] == "hnsw": @@ -292,22 +281,16 @@ def __get_vectorizer(schema: Dict[str, Any]) -> Optional[Union[str, Vectorizers] return vectorizer -def _collection_config_simple_from_json( - schema: Dict[str, Any] -) -> _CollectionConfigSimple: +def _collection_config_simple_from_json(schema: Dict[str, Any]) -> _CollectionConfigSimple: return _CollectionConfigSimple( name=schema["class"], description=schema.get("description"), generative_config=__get_generative_config(schema), properties=( - _properties_from_config(schema) - if schema.get("properties") is not None - else [] + _properties_from_config(schema) if schema.get("properties") is not None else [] ), references=( - _references_from_config(schema) - if schema.get("properties") is not None - else [] + _references_from_config(schema) if schema.get("properties") is not None else [] ), reranker_config=__get_rerank_config(schema), vectorizer_config=__get_vectorizer_config(schema), @@ -326,25 +309,17 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: b=schema["invertedIndexConfig"]["bm25"]["b"], k1=schema["invertedIndexConfig"]["bm25"]["k1"], ), - cleanup_interval_seconds=schema["invertedIndexConfig"][ - "cleanupIntervalSeconds" - ], - index_null_state=cast(dict, schema["invertedIndexConfig"]).get( - "indexNullState" - ) + cleanup_interval_seconds=schema["invertedIndexConfig"]["cleanupIntervalSeconds"], + index_null_state=cast(dict, schema["invertedIndexConfig"]).get("indexNullState") is True, index_property_length=cast(dict, schema["invertedIndexConfig"]).get( "indexPropertyLength" ) is True, - index_timestamps=cast(dict, schema["invertedIndexConfig"]).get( - "indexTimestamps" - ) + index_timestamps=cast(dict, schema["invertedIndexConfig"]).get("indexTimestamps") is True, stopwords=_StopwordsConfig( - preset=StopwordsPreset( - schema["invertedIndexConfig"]["stopwords"]["preset"] - ), + preset=StopwordsPreset(schema["invertedIndexConfig"]["stopwords"]["preset"]), additions=schema["invertedIndexConfig"]["stopwords"]["additions"], removals=schema["invertedIndexConfig"]["stopwords"]["removals"], ), @@ -359,22 +334,16 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: ), ), properties=( - _properties_from_config(schema) - if schema.get("properties") is not None - else [] + _properties_from_config(schema) if schema.get("properties") is not None else [] ), references=( - _references_from_config(schema) - if schema.get("properties") is not None - else [] + _references_from_config(schema) if schema.get("properties") is not None else [] ), replication_config=_ReplicationConfig( factor=schema["replicationConfig"]["factor"], async_enabled=schema["replicationConfig"].get("asyncEnabled", False), deletion_strategy=( - ReplicationDeletionStrategy( - schema["replicationConfig"]["deletionStrategy"] - ) + ReplicationDeletionStrategy(schema["replicationConfig"]["deletionStrategy"]) if "deletionStrategy" in schema["replicationConfig"] else ReplicationDeletionStrategy.NO_AUTOMATED_RESOLUTION ), @@ -402,12 +371,9 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: ) -def _collection_configs_from_json( - schema: Dict[str, Any] -) -> Dict[str, _CollectionConfig]: +def _collection_configs_from_json(schema: Dict[str, Any]) -> Dict[str, _CollectionConfig]: configs = { - schema["class"]: _collection_config_from_json(schema) - for schema in schema["classes"] + schema["class"]: _collection_config_from_json(schema) for schema in schema["classes"] } return dict(sorted(configs.items())) @@ -416,15 +382,12 @@ def _collection_configs_simple_from_json( schema: Dict[str, Any], ) -> Dict[str, _CollectionConfigSimple]: configs = { - schema["class"]: _collection_config_simple_from_json(schema) - for schema in schema["classes"] + schema["class"]: _collection_config_simple_from_json(schema) for schema in schema["classes"] } return dict(sorted(configs.items())) -def _nested_properties_from_config( - props: List[Dict[str, Any]] -) -> List[_NestedProperty]: +def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedProperty]: return [ _NestedProperty( data_type=DataType(prop["dataType"][0]), @@ -438,9 +401,7 @@ def _nested_properties_from_config( else None ), tokenization=( - Tokenization(prop["tokenization"]) - if prop.get("tokenization") is not None - else None + Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), ) for prop in props @@ -462,16 +423,14 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: else None ), tokenization=( - Tokenization(prop["tokenization"]) - if prop.get("tokenization") is not None - else None + Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), vectorizer_config=( _PropertyVectorizerConfig( skip=prop["moduleConfig"][schema["vectorizer"]].get("skip", False), - vectorize_property_name=prop["moduleConfig"][ - schema["vectorizer"] - ].get("vectorizePropertyName", False), + vectorize_property_name=prop["moduleConfig"][schema["vectorizer"]].get( + "vectorizePropertyName", False + ), ) if schema.get("vectorizer", "none") != "none" and prop.get("moduleConfig", None) is not None @@ -488,11 +447,7 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: if "vectorConfig" in schema else None ), - vectorizer=( - schema.get("vectorizer", "none") - if "vectorConfig" not in schema - else None - ), + vectorizer=(schema.get("vectorizer", "none") if "vectorConfig" not in schema else None), ) for prop in schema["properties"] if _is_primitive(prop["dataType"]) From f96a02fa087cd7226e606cc3cc60e97c7e3d3a6d Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 7 May 2025 15:51:13 +0200 Subject: [PATCH 22/27] add test on muvera creation --- integration/test_collection_config.py | 55 +++++++++++++++++++ .../collections/classes/config_methods.py | 16 +++--- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 630783baf..4c99095b1 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -1404,6 +1404,60 @@ def test_config_multi_vector_disabled( assert isinstance(conf, _VectorIndexConfigHNSW) assert conf.multi_vector is None +def test_config_muvera_enabled( + collection_factory: CollectionFactory, +) -> None: + dummy = collection_factory("dummy", ports=(8086, 50057)) + if dummy._connection._weaviate_version.is_lower_than(1, 31, 0): + pytest.skip("Muvera is not supported in Weaviate versions lower than 1.31.0") + + collection = collection_factory( + ports=(8086, 50057), + properties=[Property(name="name", data_type=DataType.TEXT)], + vectorizer_config=[ + Configure.NamedVectors.text2colbert_jinaai( + name="vec", + vectorize_collection_name=False, + vector_index_config=Configure.VectorIndex.hnsw( + multi_vector=Configure.VectorIndex.MultiVector.multi_vector( + encoding=Configure.VectorIndex.MultiVector.Encoding.muvera() + ) + ), + ) + ], + ) + config = collection.config.get() + assert config.vector_config is not None + conf = config.vector_config["vec"].vector_index_config + assert isinstance(conf, _VectorIndexConfigHNSW) + if collection._connection._weaviate_version.is_lower_than(1, 31, 0): + assert conf.multi_vector is None + else: + assert conf.multi_vector is not None + assert conf.multi_vector.encoding is not None + +def test_config_muvera_disabled( + collection_factory: CollectionFactory, +) -> None: + collection = collection_factory( + ports=(8086, 50057), + properties=[Property(name="name", data_type=DataType.TEXT)], + vectorizer_config=[ + Configure.NamedVectors.text2colbert_jinaai( + name="vec", + vectorize_collection_name=False, + vector_index_config=Configure.VectorIndex.hnsw( + multi_vector=Configure.VectorIndex.MultiVector.multi_vector() + ), + ) + ], + ) + config = collection.config.get() + assert config.vector_config is not None + conf = config.vector_config["vec"].vector_index_config + assert isinstance(conf, _VectorIndexConfigHNSW) + assert conf.multi_vector is not None + assert conf.multi_vector.encoding is None @pytest.mark.parametrize( "generative_config", @@ -1426,6 +1480,7 @@ def test_config_multi_vector_disabled( ], ], ) + def test_config_add_property( collection_factory: CollectionFactory, generative_config, vectorizer_config ) -> None: diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index a56a37854..a6f1eee32 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -153,13 +153,13 @@ def __get_quantizer_config( def __get_multivector_encoding(config: Dict[str, Any]) -> Optional[_MuveraConfig]: return ( None - if config.get("encoding") is None - or not config.get("encoding", {"enabled": False}).get("enabled") + if config.get("muvera") is None + or not config.get("muvera", {"enabled": False}).get("enabled") else _MuveraConfig( - enabled=config["encoding"]["enabled"], - ksim=config["encoding"]["ksim"], - dprojections=config["encoding"]["dprojections"], - repetitions=config["encoding"]["repetitions"], + enabled=config["muvera"]["enabled"], + ksim=config["muvera"]["ksim"], + dprojections=config["muvera"]["dprojections"], + repetitions=config["muvera"]["repetitions"], ) ) @@ -172,8 +172,8 @@ def __get_multivector(config: Dict[str, Any]) -> Optional[_MultiVectorConfig]: else _MultiVectorConfig( encoding=( None - if config["multivector"].get("encoding") is None - else __get_multivector_encoding(config["multivector"]["encoding"]) + if config["multivector"].get("muvera") is None + else __get_multivector_encoding(config["multivector"]) ), aggregation=config["multivector"]["aggregation"], ) From 6c560386c0593aa96154e456bdf30ddbda064203 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 7 May 2025 16:03:28 +0200 Subject: [PATCH 23/27] run ruff formatter --- integration/test_collection_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 4c99095b1..dd61225a9 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -1404,6 +1404,7 @@ def test_config_multi_vector_disabled( assert isinstance(conf, _VectorIndexConfigHNSW) assert conf.multi_vector is None + def test_config_muvera_enabled( collection_factory: CollectionFactory, ) -> None: @@ -1436,6 +1437,7 @@ def test_config_muvera_enabled( assert conf.multi_vector is not None assert conf.multi_vector.encoding is not None + def test_config_muvera_disabled( collection_factory: CollectionFactory, ) -> None: @@ -1459,6 +1461,7 @@ def test_config_muvera_disabled( assert conf.multi_vector is not None assert conf.multi_vector.encoding is None + @pytest.mark.parametrize( "generative_config", [ @@ -1480,7 +1483,6 @@ def test_config_muvera_disabled( ], ], ) - def test_config_add_property( collection_factory: CollectionFactory, generative_config, vectorizer_config ) -> None: From 96990f44d5429d753f6a6827ae97427f51a65e9d Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 7 May 2025 17:40:30 +0200 Subject: [PATCH 24/27] add skip muvera test for older version --- integration/test_collection_config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index dd61225a9..66a310e10 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -1388,6 +1388,10 @@ def test_config_multi_vector_enabled( def test_config_multi_vector_disabled( collection_factory: CollectionFactory, ) -> None: + dummy = collection_factory("dummy", ports=(8086, 50057)) + if dummy._connection._weaviate_version.is_lower_than(1, 31, 0): + pytest.skip("Muvera is not supported in Weaviate versions lower than 1.31.0") + collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], From 542fd71be47d29752f9d71db4bc0a75f0d7a5b53 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Wed, 7 May 2025 17:57:52 +0200 Subject: [PATCH 25/27] minor fix on muvera test --- integration/test_collection_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 66a310e10..19717ba3f 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -1388,10 +1388,6 @@ def test_config_multi_vector_enabled( def test_config_multi_vector_disabled( collection_factory: CollectionFactory, ) -> None: - dummy = collection_factory("dummy", ports=(8086, 50057)) - if dummy._connection._weaviate_version.is_lower_than(1, 31, 0): - pytest.skip("Muvera is not supported in Weaviate versions lower than 1.31.0") - collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], @@ -1445,6 +1441,10 @@ def test_config_muvera_enabled( def test_config_muvera_disabled( collection_factory: CollectionFactory, ) -> None: + dummy = collection_factory("dummy", ports=(8086, 50057)) + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("Muvera is not supported in Weaviate versions lower than 1.31.0") + collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], From 9d710df3585cdaaade3bd58cc9547892bafa5e42 Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Thu, 8 May 2025 10:21:09 +0200 Subject: [PATCH 26/27] change minor fix on test --- integration/test_collection_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 19717ba3f..3693b9c34 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -1443,7 +1443,7 @@ def test_config_muvera_disabled( ) -> None: dummy = collection_factory("dummy", ports=(8086, 50057)) if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): - pytest.skip("Muvera is not supported in Weaviate versions lower than 1.31.0") + pytest.skip("Multivector is not supported in Weaviate versions lower than 1.29.0") collection = collection_factory( ports=(8086, 50057), From 176def632ea7ee8c7fc8d3423aadb8ce0b88129c Mon Sep 17 00:00:00 2001 From: Roberto Esposito Date: Thu, 8 May 2025 10:31:04 +0200 Subject: [PATCH 27/27] remove explicit enabled flag for muvera --- weaviate/collections/classes/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index f5725d3e5..f78074815 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2048,13 +2048,12 @@ def __add_props( class _VectorIndexMultivectorEncoding: @staticmethod def muvera( - enabled: bool = True, ksim: Optional[int] = None, dprojections: Optional[int] = None, repetitions: Optional[int] = None, ) -> _EncodingConfigCreate: return _MuveraConfigCreate( - enabled=enabled, + enabled=True, ksim=ksim, dprojections=dprojections, repetitions=repetitions,