Skip to content

Commit 7e5b1be

Browse files
authored
Merge pull request #2011 from weaviate/query-profiling
Query profiling
2 parents 1b4eea1 + 906b35b commit 7e5b1be

12 files changed

Lines changed: 595 additions & 88 deletions

File tree

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
import re
2+
from typing import Any
3+
4+
import pytest
5+
6+
from weaviate.collections import Collection
7+
from weaviate.collections.classes.config import DataType, Property
8+
from weaviate.collections.classes.data import DataObject
9+
from weaviate.collections.classes.grpc import GroupBy, MetadataQuery
10+
from weaviate.collections.classes.internal import SearchProfileReturn
11+
from integration.conftest import CollectionFactory
12+
13+
GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)")
14+
15+
16+
def assert_go_duration(value: str, label: str = "") -> None:
17+
"""Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs')."""
18+
assert GO_DURATION_RE.fullmatch(value), (
19+
f"Expected Go duration format for {label!r}, got {value!r}"
20+
)
21+
22+
23+
def assert_common_profile(profile: SearchProfileReturn) -> None:
24+
"""Assertions shared by every search profile regardless of type."""
25+
assert len(profile.details) > 0, "Profile details should not be empty"
26+
assert "total_took" in profile.details
27+
assert_go_duration(profile.details["total_took"], "total_took")
28+
for key, value in profile.details.items():
29+
assert isinstance(key, str) and key != ""
30+
assert isinstance(value, str) and value != ""
31+
32+
33+
def _create_and_populate(collection_factory: CollectionFactory) -> Collection[Any, Any]:
34+
collection = collection_factory(
35+
properties=[Property(name="text", data_type=DataType.TEXT)],
36+
)
37+
if collection._connection._weaviate_version.is_lower_than(1, 36, 9):
38+
pytest.skip("Query profiling requires Weaviate >= 1.36.9")
39+
collection.data.insert_many(
40+
[
41+
DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]),
42+
DataObject(properties={"text": "goodbye world"}, vector=[0.0, 1.0, 0.0]),
43+
DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]),
44+
]
45+
)
46+
return collection
47+
48+
49+
def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) -> None:
50+
"""Test that query profiling works with fetch_objects (object lookup)."""
51+
collection = _create_and_populate(collection_factory)
52+
result = collection.query.fetch_objects(
53+
return_metadata=MetadataQuery(query_profile=True),
54+
)
55+
assert len(result.objects) == 3
56+
assert result.query_profile is not None
57+
assert len(result.query_profile.shards) > 0
58+
59+
shard = result.query_profile.shards[0]
60+
assert shard.name != ""
61+
assert shard.node != ""
62+
63+
assert "object" in shard.searches
64+
assert "vector" not in shard.searches
65+
assert "keyword" not in shard.searches
66+
assert_common_profile(shard.searches["object"])
67+
68+
69+
def test_near_vector_with_query_profile(collection_factory: CollectionFactory) -> None:
70+
"""Test that query profiling works with near_vector search."""
71+
collection = _create_and_populate(collection_factory)
72+
result = collection.query.near_vector(
73+
near_vector=[1.0, 0.0, 0.0],
74+
return_metadata=MetadataQuery(query_profile=True, distance=True),
75+
limit=2,
76+
)
77+
assert len(result.objects) == 2
78+
assert result.query_profile is not None
79+
assert len(result.query_profile.shards) > 0
80+
81+
shard = result.query_profile.shards[0]
82+
assert "vector" in shard.searches
83+
assert "keyword" not in shard.searches
84+
assert "object" not in shard.searches
85+
vector_profile = shard.searches["vector"]
86+
assert_common_profile(vector_profile)
87+
88+
assert "vector_search_took" in vector_profile.details
89+
assert_go_duration(vector_profile.details["vector_search_took"], "vector_search_took")
90+
91+
assert "hnsw_flat_search" in vector_profile.details
92+
assert vector_profile.details["hnsw_flat_search"] in ("true", "false")
93+
94+
layer_keys = [k for k in vector_profile.details if k.startswith("knn_search_layer_")]
95+
assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key"
96+
for k in layer_keys:
97+
assert_go_duration(vector_profile.details[k], k)
98+
99+
assert "objects_took" in vector_profile.details
100+
assert_go_duration(vector_profile.details["objects_took"], "objects_took")
101+
102+
103+
def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None:
104+
"""Test that query profiling works with BM25 keyword search."""
105+
collection = _create_and_populate(collection_factory)
106+
result = collection.query.bm25(
107+
query="hello",
108+
return_metadata=MetadataQuery(query_profile=True, score=True),
109+
)
110+
assert result.query_profile is not None
111+
assert len(result.query_profile.shards) > 0
112+
113+
shard = result.query_profile.shards[0]
114+
assert "keyword" in shard.searches
115+
assert "vector" not in shard.searches
116+
assert "object" not in shard.searches
117+
keyword_profile = shard.searches["keyword"]
118+
assert_common_profile(keyword_profile)
119+
120+
assert "kwd_method" in keyword_profile.details
121+
assert keyword_profile.details["kwd_method"] != ""
122+
123+
assert "kwd_time" in keyword_profile.details
124+
assert_go_duration(keyword_profile.details["kwd_time"], "kwd_time")
125+
126+
assert "kwd_1_tok_time" in keyword_profile.details
127+
assert_go_duration(keyword_profile.details["kwd_1_tok_time"], "kwd_1_tok_time")
128+
129+
assert "kwd_6_res_count" in keyword_profile.details
130+
assert keyword_profile.details["kwd_6_res_count"].isdigit()
131+
assert int(keyword_profile.details["kwd_6_res_count"]) >= 0
132+
133+
134+
def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> None:
135+
"""Test that query profiling works with hybrid search (both vector and keyword)."""
136+
collection = _create_and_populate(collection_factory)
137+
result = collection.query.hybrid(
138+
query="hello",
139+
vector=[1.0, 0.0, 0.0],
140+
return_metadata=MetadataQuery(query_profile=True),
141+
limit=2,
142+
)
143+
assert result.query_profile is not None
144+
assert len(result.query_profile.shards) > 0
145+
146+
shard = result.query_profile.shards[0]
147+
assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile"
148+
assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile"
149+
assert "object" not in shard.searches
150+
151+
assert_common_profile(shard.searches["vector"])
152+
assert "vector_search_took" in shard.searches["vector"].details
153+
154+
assert_common_profile(shard.searches["keyword"])
155+
assert "kwd_method" in shard.searches["keyword"].details
156+
157+
158+
def test_near_vector_group_by_with_query_profile(
159+
collection_factory: CollectionFactory,
160+
) -> None:
161+
"""Test that query profiling works with group_by."""
162+
collection = _create_and_populate(collection_factory)
163+
result = collection.query.near_vector(
164+
near_vector=[1.0, 0.0, 0.0],
165+
return_metadata=MetadataQuery(query_profile=True),
166+
group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3),
167+
)
168+
assert result.query_profile is not None
169+
assert len(result.query_profile.shards) > 0
170+
171+
shard = result.query_profile.shards[0]
172+
assert "vector" in shard.searches
173+
assert_common_profile(shard.searches["vector"])
174+
175+
176+
def test_full_with_profile(collection_factory: CollectionFactory) -> None:
177+
"""Test that MetadataQuery.full_with_profile() returns profiling and all other metadata."""
178+
collection = _create_and_populate(collection_factory)
179+
result = collection.query.near_vector(
180+
near_vector=[1.0, 0.0, 0.0],
181+
return_metadata=MetadataQuery.full_with_profile(),
182+
limit=1,
183+
)
184+
assert len(result.objects) == 1
185+
obj = result.objects[0]
186+
assert obj.metadata.distance is not None
187+
assert obj.metadata.creation_time is not None
188+
assert obj.metadata.last_update_time is not None
189+
assert obj.metadata.score is not None
190+
assert obj.metadata.explain_score is not None
191+
192+
assert result.query_profile is not None
193+
assert len(result.query_profile.shards) > 0
194+
assert_common_profile(result.query_profile.shards[0].searches["vector"])
195+
196+
197+
def test_full_excludes_query_profile(collection_factory: CollectionFactory) -> None:
198+
"""Test that MetadataQuery.full() does not include query profiling."""
199+
collection = _create_and_populate(collection_factory)
200+
result = collection.query.fetch_objects(
201+
return_metadata=MetadataQuery.full(),
202+
)
203+
assert result.query_profile is None
204+
205+
206+
def test_no_query_profile_when_not_requested(
207+
collection_factory: CollectionFactory,
208+
) -> None:
209+
"""Test that query_profile is None when not requested."""
210+
collection = _create_and_populate(collection_factory)
211+
result = collection.query.fetch_objects(
212+
return_metadata=MetadataQuery(distance=True),
213+
)
214+
assert result.query_profile is None
215+
216+
217+
def test_query_profile_with_metadata_list(
218+
collection_factory: CollectionFactory,
219+
) -> None:
220+
"""Test that query profiling works when using list-style metadata."""
221+
collection = _create_and_populate(collection_factory)
222+
result = collection.query.near_vector(
223+
near_vector=[1.0, 0.0, 0.0],
224+
return_metadata=["query_profile", "distance"],
225+
limit=2,
226+
)
227+
assert result.query_profile is not None
228+
assert len(result.query_profile.shards) > 0
229+
230+
shard = result.query_profile.shards[0]
231+
assert "vector" in shard.searches
232+
assert_common_profile(shard.searches["vector"])
233+
234+
235+
def test_query_profile_details_are_strings(
236+
collection_factory: CollectionFactory,
237+
) -> None:
238+
"""Test that all detail keys and values are non-empty strings."""
239+
collection = _create_and_populate(collection_factory)
240+
result = collection.query.near_vector(
241+
near_vector=[1.0, 0.0, 0.0],
242+
return_metadata=MetadataQuery(query_profile=True),
243+
limit=1,
244+
)
245+
assert result.query_profile is not None
246+
for shard in result.query_profile.shards:
247+
assert len(shard.searches) > 0, "Shard should have at least one search profile"
248+
for search_type, profile in shard.searches.items():
249+
assert isinstance(search_type, str) and search_type != ""
250+
assert len(profile.details) > 0
251+
for key, value in profile.details.items():
252+
assert isinstance(key, str) and key != ""
253+
assert isinstance(value, str) and value != ""

weaviate/collections/classes/grpc.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,32 @@ class MetadataQuery(_WeaviateInput):
9090
score: bool = Field(default=False)
9191
explain_score: bool = Field(default=False)
9292
is_consistent: bool = Field(default=False)
93+
query_profile: bool = Field(default=False)
9394

9495
@classmethod
9596
def full(cls) -> "MetadataQuery":
96-
"""Return a MetadataQuery with all fields set to True."""
97+
"""Return a MetadataQuery with all fields set to True.
98+
99+
NOTE: `query_profile` is excluded because it adds performance overhead.
100+
Use `full_with_profile()` to include it.
101+
"""
102+
return cls(
103+
creation_time=True,
104+
last_update_time=True,
105+
distance=True,
106+
certainty=True,
107+
score=True,
108+
explain_score=True,
109+
is_consistent=True,
110+
)
111+
112+
@classmethod
113+
def full_with_profile(cls) -> "MetadataQuery":
114+
"""Return a MetadataQuery with all fields set to True, including query profiling.
115+
116+
Query profiling adds per-shard execution timing breakdowns to the response
117+
but has performance overhead. Requires Weaviate >= 1.36.9.
118+
"""
97119
return cls(
98120
creation_time=True,
99121
last_update_time=True,
@@ -102,6 +124,7 @@ def full(cls) -> "MetadataQuery":
102124
score=True,
103125
explain_score=True,
104126
is_consistent=True,
127+
query_profile=True,
105128
)
106129

107130

@@ -117,6 +140,7 @@ class _MetadataQuery:
117140
explain_score: bool = False
118141
is_consistent: bool = False
119142
vectors: Optional[List[str]] = None
143+
query_profile: bool = False
120144

121145
@classmethod
122146
def from_public(
@@ -138,6 +162,7 @@ def from_public(
138162
score=public.score,
139163
explain_score=public.explain_score,
140164
is_consistent=public.is_consistent,
165+
query_profile=public.query_profile,
141166
)
142167
)
143168

@@ -152,6 +177,7 @@ def from_public(
152177
"score",
153178
"explain_score",
154179
"is_consistent",
180+
"query_profile",
155181
]
156182
],
157183
MetadataQuery,

weaviate/collections/classes/internal.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,29 @@ def _is_empty(self) -> bool:
9090
)
9191

9292

93+
@dataclass
94+
class SearchProfileReturn:
95+
"""Profiling details for a single search type within a shard."""
96+
97+
details: Dict[str, str]
98+
99+
100+
@dataclass
101+
class ShardProfileReturn:
102+
"""Profiling data for a single shard."""
103+
104+
name: str
105+
node: str
106+
searches: Dict[str, SearchProfileReturn]
107+
108+
109+
@dataclass
110+
class QueryProfileReturn:
111+
"""Per-shard query profiling data returned when `query_profile=True` is set in metadata."""
112+
113+
shards: List[ShardProfileReturn]
114+
115+
93116
@dataclass
94117
class GroupByMetadataReturn:
95118
"""Metadata of an object returned by a group by query."""
@@ -210,17 +233,20 @@ class GenerativeReturn(Generic[P, R]):
210233
__generated: Optional[str]
211234
objects: List[GenerativeObject[P, R]]
212235
generative: Optional[GenerativeGrouped]
236+
query_profile: Optional[QueryProfileReturn]
213237

214238
# init required because of nuances of dataclass when defining @property generated and private var __generated
215239
def __init__(
216240
self,
217241
generated: Optional[str],
218242
objects: List[GenerativeObject[P, R]],
219243
generative: Optional[GenerativeGrouped],
244+
query_profile: Optional[QueryProfileReturn] = None,
220245
) -> None:
221246
self.__generated = generated
222247
self.objects = objects
223248
self.generative = generative
249+
self.query_profile = query_profile
224250

225251
@property
226252
@deprecated(
@@ -257,6 +283,7 @@ class GenerativeGroupByReturn(Generic[P, R]):
257283
objects: List[GroupByObject[P, R]]
258284
groups: Dict[str, GenerativeGroup[P, R]]
259285
generated: Optional[str]
286+
query_profile: Optional[QueryProfileReturn] = None
260287

261288

262289
@dataclass
@@ -265,13 +292,15 @@ class GroupByReturn(Generic[P, R]):
265292

266293
objects: List[GroupByObject[P, R]]
267294
groups: Dict[str, Group[P, R]]
295+
query_profile: Optional[QueryProfileReturn] = None
268296

269297

270298
@dataclass
271299
class QueryReturn(Generic[P, R]):
272300
"""The return type of a query within the `.query` namespace of a collection."""
273301

274302
objects: List[Object[P, R]]
303+
query_profile: Optional[QueryProfileReturn] = None
275304

276305

277306
_GQLEntryReturnType: TypeAlias = Dict[str, List[Dict[str, Any]]]

0 commit comments

Comments
 (0)