-
Notifications
You must be signed in to change notification settings - Fork 119
Expand file tree
/
Copy pathtest_config_methods.py
More file actions
148 lines (134 loc) · 5.19 KB
/
test_config_methods.py
File metadata and controls
148 lines (134 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from weaviate.collections.classes.config_methods import (
_collection_configs_simple_from_json,
_nested_properties_from_config,
_properties_from_config,
)
def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None:
"""Test that _collection_configs_simple_from_json handles None vectorizer config."""
schema = {
"classes": [
{
"class": "TestCollection",
"vectorConfig": {
"default": {
"vectorizer": {"text2vec-transformers": None},
"vectorIndexType": "hnsw",
"vectorIndexConfig": {
"skip": False,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine",
},
}
},
"properties": [],
"invertedIndexConfig": {
"bm25": {"b": 0.75, "k1": 1.2},
"cleanupIntervalSeconds": 60,
"stopwords": {"preset": "en", "additions": None, "removals": None},
},
"replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"},
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actualCount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
"key": "_id",
"strategy": "hash",
"function": "murmur3",
},
"vectorIndexType": "hnsw",
"vectorIndexConfig": {
"skip": False,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine",
},
}
]
}
configs = _collection_configs_simple_from_json(schema)
assert "TestCollection" in configs
vec_config = configs["TestCollection"].vector_config
assert vec_config is not None
assert "default" in vec_config
assert vec_config["default"].vectorizer.model == {}
assert vec_config["default"].vectorizer.source_properties is None
def _make_text_prop(name: str, **extra) -> dict:
base = {
"name": name,
"dataType": ["text"],
"indexFilterable": True,
"indexSearchable": True,
"indexRangeFilters": False,
"tokenization": "word",
}
base.update(extra)
return base
def test_properties_from_config_parses_text_analyzer() -> None:
schema = {
"vectorizer": "none",
"properties": [
_make_text_prop(
"title",
textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["é"]},
),
_make_text_prop("body"),
],
}
props = _properties_from_config(schema)
title = next(p for p in props if p.name == "title")
body = next(p for p in props if p.name == "body")
assert title.text_analyzer is not None
assert title.text_analyzer.ascii_fold is True
assert title.text_analyzer.ascii_fold_ignore == ["é"]
assert body.text_analyzer is None
# The dataclass round-trips back to the wire format.
assert title.to_dict()["textAnalyzer"] == {
"asciiFold": True,
"asciiFoldIgnore": ["é"],
}
assert "textAnalyzer" not in body.to_dict()
def test_properties_from_config_text_analyzer_defaults_when_partial() -> None:
schema = {
"vectorizer": "none",
"properties": [
_make_text_prop("title", textAnalyzer={"asciiFoldIgnore": ["é"]}),
],
}
title = _properties_from_config(schema)[0]
assert title.text_analyzer is not None
# asciiFold defaults to False when omitted from the server response
assert title.text_analyzer.ascii_fold is False
assert title.text_analyzer.ascii_fold_ignore == ["é"]
def test_nested_properties_from_config_parses_text_analyzer() -> None:
nested = _nested_properties_from_config(
[
_make_text_prop(
"title",
textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["ñ"]},
),
]
)
assert nested[0].text_analyzer is not None
assert nested[0].text_analyzer.ascii_fold is True
assert nested[0].text_analyzer.ascii_fold_ignore == ["ñ"]
assert nested[0].to_dict()["textAnalyzer"] == {
"asciiFold": True,
"asciiFoldIgnore": ["ñ"],
}