Skip to content

Commit b573021

Browse files
authored
Merge pull request #2037 from dudanogueira/fix/aggregate-empty-result-none-values
Fix aggregate metrics returning 0/0.0 instead of None for empty result sets
2 parents c307d97 + 397cf5b commit b573021

2 files changed

Lines changed: 95 additions & 20 deletions

File tree

integration/test_collection_aggregate.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,3 +884,66 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None:
884884
object_limit=10,
885885
)
886886
assert res.total_count == 4
887+
888+
889+
def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None:
890+
"""Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0."""
891+
collection = collection_factory(
892+
properties=[
893+
Property(
894+
name="bucket",
895+
data_type=DataType.INT,
896+
index_filterable=True,
897+
index_range_filters=True,
898+
),
899+
Property(name="intVal", data_type=DataType.INT),
900+
Property(name="numberVal", data_type=DataType.NUMBER),
901+
],
902+
vectorizer_config=Configure.Vectorizer.none(),
903+
inverted_index_config=Configure.inverted_index(index_null_state=True),
904+
)
905+
906+
if collection._connection._weaviate_version.is_lower_than(1, 29, 0):
907+
pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher")
908+
909+
# Insert one object with bucket=0
910+
collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78})
911+
912+
# Query with a filter that returns no results (bucket=99 doesn't exist)
913+
res: AggregateReturn = collection.aggregate.over_all(
914+
filters=Filter.by_property("bucket").equal(99),
915+
total_count=True,
916+
return_metrics=[
917+
Metrics("intVal").integer(
918+
count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True
919+
),
920+
Metrics("numberVal").number(
921+
count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True
922+
),
923+
],
924+
)
925+
926+
# Verify total_count is 0
927+
assert res.total_count == 0
928+
929+
# Verify integer metrics: count should be 0, all other metrics should be None (not 0)
930+
int_metrics = res.properties["intVal"]
931+
assert isinstance(int_metrics, AggregateInteger)
932+
assert int_metrics.count == 0
933+
assert int_metrics.maximum is None, "maximum should be None for empty result set"
934+
assert int_metrics.mean is None, "mean should be None for empty result set"
935+
assert int_metrics.median is None, "median should be None for empty result set"
936+
assert int_metrics.minimum is None, "minimum should be None for empty result set"
937+
assert int_metrics.mode is None, "mode should be None for empty result set"
938+
assert int_metrics.sum_ is None, "sum_ should be None for empty result set"
939+
940+
# Verify number metrics: count should be 0, all other metrics should be None (not 0.0)
941+
number_metrics = res.properties["numberVal"]
942+
assert isinstance(number_metrics, AggregateNumber)
943+
assert number_metrics.count == 0
944+
assert number_metrics.maximum is None, "maximum should be None for empty result set"
945+
assert number_metrics.mean is None, "mean should be None for empty result set"
946+
assert number_metrics.median is None, "median should be None for empty result set"
947+
assert number_metrics.minimum is None, "minimum should be None for empty result set"
948+
assert number_metrics.mode is None, "mode should be None for empty result set"
949+
assert number_metrics.sum_ is None, "sum_ should be None for empty result set"

weaviate/collections/aggregations/base_executor.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -269,38 +269,50 @@ def __parse_property_grpc(
269269
elif aggregation.HasField("int"):
270270
return AggregateInteger(
271271
count=aggregation.int.count,
272-
maximum=aggregation.int.maximum,
273-
mean=aggregation.int.mean,
274-
median=aggregation.int.median,
275-
minimum=aggregation.int.minimum,
276-
mode=aggregation.int.mode,
277-
sum_=aggregation.int.sum,
272+
maximum=aggregation.int.maximum if aggregation.int.HasField("maximum") else None,
273+
mean=aggregation.int.mean if aggregation.int.HasField("mean") else None,
274+
median=aggregation.int.median if aggregation.int.HasField("median") else None,
275+
minimum=aggregation.int.minimum if aggregation.int.HasField("minimum") else None,
276+
mode=aggregation.int.mode if aggregation.int.HasField("mode") else None,
277+
sum_=aggregation.int.sum if aggregation.int.HasField("sum") else None,
278278
)
279279
elif aggregation.HasField("number"):
280280
return AggregateNumber(
281281
count=aggregation.number.count,
282-
maximum=aggregation.number.maximum,
283-
mean=aggregation.number.mean,
284-
median=aggregation.number.median,
285-
minimum=aggregation.number.minimum,
286-
mode=aggregation.number.mode,
287-
sum_=aggregation.number.sum,
282+
maximum=aggregation.number.maximum
283+
if aggregation.number.HasField("maximum")
284+
else None,
285+
mean=aggregation.number.mean if aggregation.number.HasField("mean") else None,
286+
median=aggregation.number.median if aggregation.number.HasField("median") else None,
287+
minimum=aggregation.number.minimum
288+
if aggregation.number.HasField("minimum")
289+
else None,
290+
mode=aggregation.number.mode if aggregation.number.HasField("mode") else None,
291+
sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None,
288292
)
289293
elif aggregation.HasField("boolean"):
290294
return AggregateBoolean(
291295
count=aggregation.boolean.count,
292-
percentage_false=aggregation.boolean.percentage_false,
293-
percentage_true=aggregation.boolean.percentage_true,
294-
total_false=aggregation.boolean.total_false,
295-
total_true=aggregation.boolean.total_true,
296+
percentage_false=aggregation.boolean.percentage_false
297+
if aggregation.boolean.HasField("percentage_false")
298+
else None,
299+
percentage_true=aggregation.boolean.percentage_true
300+
if aggregation.boolean.HasField("percentage_true")
301+
else None,
302+
total_false=aggregation.boolean.total_false
303+
if aggregation.boolean.HasField("total_false")
304+
else None,
305+
total_true=aggregation.boolean.total_true
306+
if aggregation.boolean.HasField("total_true")
307+
else None,
296308
)
297309
elif aggregation.HasField("date"):
298310
return AggregateDate(
299311
count=aggregation.date.count,
300-
maximum=aggregation.date.maximum,
301-
median=aggregation.date.median,
302-
minimum=aggregation.date.minimum,
303-
mode=aggregation.date.mode,
312+
maximum=aggregation.date.maximum if aggregation.date.HasField("maximum") else None,
313+
median=aggregation.date.median if aggregation.date.HasField("median") else None,
314+
minimum=aggregation.date.minimum if aggregation.date.HasField("minimum") else None,
315+
mode=aggregation.date.mode if aggregation.date.HasField("mode") else None,
304316
)
305317
elif aggregation.HasField("reference"):
306318
return AggregateReference(pointing_to=list(aggregation.reference.pointing_to))

0 commit comments

Comments
 (0)