|
| 1 | +import re |
| 2 | +from typing import Any |
| 3 | + |
| 4 | +import pytest |
| 5 | + |
| 6 | +from weaviate.collections import Collection |
| 7 | +from weaviate.collections.classes.config import DataType, Property |
| 8 | +from weaviate.collections.classes.data import DataObject |
| 9 | +from weaviate.collections.classes.grpc import GroupBy, MetadataQuery |
| 10 | +from weaviate.collections.classes.internal import SearchProfileReturn |
| 11 | +from integration.conftest import CollectionFactory |
| 12 | + |
| 13 | +GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)") |
| 14 | + |
| 15 | + |
| 16 | +def assert_go_duration(value: str, label: str = "") -> None: |
| 17 | + """Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs').""" |
| 18 | + assert GO_DURATION_RE.fullmatch(value), ( |
| 19 | + f"Expected Go duration format for {label!r}, got {value!r}" |
| 20 | + ) |
| 21 | + |
| 22 | + |
| 23 | +def assert_common_profile(profile: SearchProfileReturn) -> None: |
| 24 | + """Assertions shared by every search profile regardless of type.""" |
| 25 | + assert len(profile.details) > 0, "Profile details should not be empty" |
| 26 | + assert "total_took" in profile.details |
| 27 | + assert_go_duration(profile.details["total_took"], "total_took") |
| 28 | + for key, value in profile.details.items(): |
| 29 | + assert isinstance(key, str) and key != "" |
| 30 | + assert isinstance(value, str) and value != "" |
| 31 | + |
| 32 | + |
| 33 | +def _create_and_populate(collection_factory: CollectionFactory) -> Collection[Any, Any]: |
| 34 | + collection = collection_factory( |
| 35 | + properties=[Property(name="text", data_type=DataType.TEXT)], |
| 36 | + ) |
| 37 | + if collection._connection._weaviate_version.is_lower_than(1, 36, 9): |
| 38 | + pytest.skip("Query profiling requires Weaviate >= 1.36.9") |
| 39 | + collection.data.insert_many( |
| 40 | + [ |
| 41 | + DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]), |
| 42 | + DataObject(properties={"text": "goodbye world"}, vector=[0.0, 1.0, 0.0]), |
| 43 | + DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]), |
| 44 | + ] |
| 45 | + ) |
| 46 | + return collection |
| 47 | + |
| 48 | + |
| 49 | +def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) -> None: |
| 50 | + """Test that query profiling works with fetch_objects (object lookup).""" |
| 51 | + collection = _create_and_populate(collection_factory) |
| 52 | + result = collection.query.fetch_objects( |
| 53 | + return_metadata=MetadataQuery(query_profile=True), |
| 54 | + ) |
| 55 | + assert len(result.objects) == 3 |
| 56 | + assert result.query_profile is not None |
| 57 | + assert len(result.query_profile.shards) > 0 |
| 58 | + |
| 59 | + shard = result.query_profile.shards[0] |
| 60 | + assert shard.name != "" |
| 61 | + assert shard.node != "" |
| 62 | + |
| 63 | + assert "object" in shard.searches |
| 64 | + assert "vector" not in shard.searches |
| 65 | + assert "keyword" not in shard.searches |
| 66 | + assert_common_profile(shard.searches["object"]) |
| 67 | + |
| 68 | + |
| 69 | +def test_near_vector_with_query_profile(collection_factory: CollectionFactory) -> None: |
| 70 | + """Test that query profiling works with near_vector search.""" |
| 71 | + collection = _create_and_populate(collection_factory) |
| 72 | + result = collection.query.near_vector( |
| 73 | + near_vector=[1.0, 0.0, 0.0], |
| 74 | + return_metadata=MetadataQuery(query_profile=True, distance=True), |
| 75 | + limit=2, |
| 76 | + ) |
| 77 | + assert len(result.objects) == 2 |
| 78 | + assert result.query_profile is not None |
| 79 | + assert len(result.query_profile.shards) > 0 |
| 80 | + |
| 81 | + shard = result.query_profile.shards[0] |
| 82 | + assert "vector" in shard.searches |
| 83 | + assert "keyword" not in shard.searches |
| 84 | + assert "object" not in shard.searches |
| 85 | + vector_profile = shard.searches["vector"] |
| 86 | + assert_common_profile(vector_profile) |
| 87 | + |
| 88 | + assert "vector_search_took" in vector_profile.details |
| 89 | + assert_go_duration(vector_profile.details["vector_search_took"], "vector_search_took") |
| 90 | + |
| 91 | + assert "hnsw_flat_search" in vector_profile.details |
| 92 | + assert vector_profile.details["hnsw_flat_search"] in ("true", "false") |
| 93 | + |
| 94 | + layer_keys = [k for k in vector_profile.details if k.startswith("knn_search_layer_")] |
| 95 | + assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key" |
| 96 | + for k in layer_keys: |
| 97 | + assert_go_duration(vector_profile.details[k], k) |
| 98 | + |
| 99 | + assert "objects_took" in vector_profile.details |
| 100 | + assert_go_duration(vector_profile.details["objects_took"], "objects_took") |
| 101 | + |
| 102 | + |
| 103 | +def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None: |
| 104 | + """Test that query profiling works with BM25 keyword search.""" |
| 105 | + collection = _create_and_populate(collection_factory) |
| 106 | + result = collection.query.bm25( |
| 107 | + query="hello", |
| 108 | + return_metadata=MetadataQuery(query_profile=True, score=True), |
| 109 | + ) |
| 110 | + assert result.query_profile is not None |
| 111 | + assert len(result.query_profile.shards) > 0 |
| 112 | + |
| 113 | + shard = result.query_profile.shards[0] |
| 114 | + assert "keyword" in shard.searches |
| 115 | + assert "vector" not in shard.searches |
| 116 | + assert "object" not in shard.searches |
| 117 | + keyword_profile = shard.searches["keyword"] |
| 118 | + assert_common_profile(keyword_profile) |
| 119 | + |
| 120 | + assert "kwd_method" in keyword_profile.details |
| 121 | + assert keyword_profile.details["kwd_method"] != "" |
| 122 | + |
| 123 | + assert "kwd_time" in keyword_profile.details |
| 124 | + assert_go_duration(keyword_profile.details["kwd_time"], "kwd_time") |
| 125 | + |
| 126 | + assert "kwd_1_tok_time" in keyword_profile.details |
| 127 | + assert_go_duration(keyword_profile.details["kwd_1_tok_time"], "kwd_1_tok_time") |
| 128 | + |
| 129 | + assert "kwd_6_res_count" in keyword_profile.details |
| 130 | + assert keyword_profile.details["kwd_6_res_count"].isdigit() |
| 131 | + assert int(keyword_profile.details["kwd_6_res_count"]) >= 0 |
| 132 | + |
| 133 | + |
| 134 | +def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> None: |
| 135 | + """Test that query profiling works with hybrid search (both vector and keyword).""" |
| 136 | + collection = _create_and_populate(collection_factory) |
| 137 | + result = collection.query.hybrid( |
| 138 | + query="hello", |
| 139 | + vector=[1.0, 0.0, 0.0], |
| 140 | + return_metadata=MetadataQuery(query_profile=True), |
| 141 | + limit=2, |
| 142 | + ) |
| 143 | + assert result.query_profile is not None |
| 144 | + assert len(result.query_profile.shards) > 0 |
| 145 | + |
| 146 | + shard = result.query_profile.shards[0] |
| 147 | + assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile" |
| 148 | + assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile" |
| 149 | + assert "object" not in shard.searches |
| 150 | + |
| 151 | + assert_common_profile(shard.searches["vector"]) |
| 152 | + assert "vector_search_took" in shard.searches["vector"].details |
| 153 | + |
| 154 | + assert_common_profile(shard.searches["keyword"]) |
| 155 | + assert "kwd_method" in shard.searches["keyword"].details |
| 156 | + |
| 157 | + |
| 158 | +def test_near_vector_group_by_with_query_profile( |
| 159 | + collection_factory: CollectionFactory, |
| 160 | +) -> None: |
| 161 | + """Test that query profiling works with group_by.""" |
| 162 | + collection = _create_and_populate(collection_factory) |
| 163 | + result = collection.query.near_vector( |
| 164 | + near_vector=[1.0, 0.0, 0.0], |
| 165 | + return_metadata=MetadataQuery(query_profile=True), |
| 166 | + group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3), |
| 167 | + ) |
| 168 | + assert result.query_profile is not None |
| 169 | + assert len(result.query_profile.shards) > 0 |
| 170 | + |
| 171 | + shard = result.query_profile.shards[0] |
| 172 | + assert "vector" in shard.searches |
| 173 | + assert_common_profile(shard.searches["vector"]) |
| 174 | + |
| 175 | + |
| 176 | +def test_full_with_profile(collection_factory: CollectionFactory) -> None: |
| 177 | + """Test that MetadataQuery.full_with_profile() returns profiling and all other metadata.""" |
| 178 | + collection = _create_and_populate(collection_factory) |
| 179 | + result = collection.query.near_vector( |
| 180 | + near_vector=[1.0, 0.0, 0.0], |
| 181 | + return_metadata=MetadataQuery.full_with_profile(), |
| 182 | + limit=1, |
| 183 | + ) |
| 184 | + assert len(result.objects) == 1 |
| 185 | + obj = result.objects[0] |
| 186 | + assert obj.metadata.distance is not None |
| 187 | + assert obj.metadata.creation_time is not None |
| 188 | + assert obj.metadata.last_update_time is not None |
| 189 | + assert obj.metadata.score is not None |
| 190 | + assert obj.metadata.explain_score is not None |
| 191 | + |
| 192 | + assert result.query_profile is not None |
| 193 | + assert len(result.query_profile.shards) > 0 |
| 194 | + assert_common_profile(result.query_profile.shards[0].searches["vector"]) |
| 195 | + |
| 196 | + |
| 197 | +def test_full_excludes_query_profile(collection_factory: CollectionFactory) -> None: |
| 198 | + """Test that MetadataQuery.full() does not include query profiling.""" |
| 199 | + collection = _create_and_populate(collection_factory) |
| 200 | + result = collection.query.fetch_objects( |
| 201 | + return_metadata=MetadataQuery.full(), |
| 202 | + ) |
| 203 | + assert result.query_profile is None |
| 204 | + |
| 205 | + |
| 206 | +def test_no_query_profile_when_not_requested( |
| 207 | + collection_factory: CollectionFactory, |
| 208 | +) -> None: |
| 209 | + """Test that query_profile is None when not requested.""" |
| 210 | + collection = _create_and_populate(collection_factory) |
| 211 | + result = collection.query.fetch_objects( |
| 212 | + return_metadata=MetadataQuery(distance=True), |
| 213 | + ) |
| 214 | + assert result.query_profile is None |
| 215 | + |
| 216 | + |
| 217 | +def test_query_profile_with_metadata_list( |
| 218 | + collection_factory: CollectionFactory, |
| 219 | +) -> None: |
| 220 | + """Test that query profiling works when using list-style metadata.""" |
| 221 | + collection = _create_and_populate(collection_factory) |
| 222 | + result = collection.query.near_vector( |
| 223 | + near_vector=[1.0, 0.0, 0.0], |
| 224 | + return_metadata=["query_profile", "distance"], |
| 225 | + limit=2, |
| 226 | + ) |
| 227 | + assert result.query_profile is not None |
| 228 | + assert len(result.query_profile.shards) > 0 |
| 229 | + |
| 230 | + shard = result.query_profile.shards[0] |
| 231 | + assert "vector" in shard.searches |
| 232 | + assert_common_profile(shard.searches["vector"]) |
| 233 | + |
| 234 | + |
| 235 | +def test_query_profile_details_are_strings( |
| 236 | + collection_factory: CollectionFactory, |
| 237 | +) -> None: |
| 238 | + """Test that all detail keys and values are non-empty strings.""" |
| 239 | + collection = _create_and_populate(collection_factory) |
| 240 | + result = collection.query.near_vector( |
| 241 | + near_vector=[1.0, 0.0, 0.0], |
| 242 | + return_metadata=MetadataQuery(query_profile=True), |
| 243 | + limit=1, |
| 244 | + ) |
| 245 | + assert result.query_profile is not None |
| 246 | + for shard in result.query_profile.shards: |
| 247 | + assert len(shard.searches) > 0, "Shard should have at least one search profile" |
| 248 | + for search_type, profile in shard.searches.items(): |
| 249 | + assert isinstance(search_type, str) and search_type != "" |
| 250 | + assert len(profile.details) > 0 |
| 251 | + for key, value in profile.details.items(): |
| 252 | + assert isinstance(key, str) and key != "" |
| 253 | + assert isinstance(value, str) and value != "" |
0 commit comments