Skip to content

Commit 704fb8f

Browse files
committed
Fix aggregate metrics returning 0/0.0 instead of None for empty result sets
- Fixed __parse_property_grpc() to check HasField() before accessing optional protobuf scalars - Integer/Number aggregates now correctly return None for unset metrics (maximum, mean, median, minimum, mode, sum_) - Date aggregates now correctly return None for unset metrics (maximum, median, minimum, mode) - Boolean aggregates now correctly return None for unset metrics (percentage_false, percentage_true, total_false, total_true) - Added integration test to verify None values for empty aggregate results - Aligns gRPC behavior with GraphQL API behavior Fixes #2036
1 parent c307d97 commit 704fb8f

2 files changed

Lines changed: 75 additions & 20 deletions

File tree

integration/test_collection_aggregate.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,3 +884,58 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None:
884884
object_limit=10,
885885
)
886886
assert res.total_count == 4
887+
888+
889+
def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None:
890+
"""Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0"""
891+
collection = collection_factory(
892+
properties=[
893+
Property(name="bucket", data_type=DataType.INT, index_filterable=True, index_range_filters=True),
894+
Property(name="intVal", data_type=DataType.INT),
895+
Property(name="numberVal", data_type=DataType.NUMBER),
896+
],
897+
vectorizer_config=Configure.Vectorizer.none(),
898+
inverted_index_config=Configure.inverted_index(index_null_state=True),
899+
)
900+
901+
if collection._connection._weaviate_version.is_lower_than(1, 29, 0):
902+
pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher")
903+
904+
# Insert one object with bucket=0
905+
collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78})
906+
907+
# Query with a filter that returns no results (bucket=99 doesn't exist)
908+
res: AggregateReturn = collection.aggregate.over_all(
909+
filters=Filter.by_property("bucket").equal(99),
910+
total_count=True,
911+
return_metrics=[
912+
Metrics("intVal").integer(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True),
913+
Metrics("numberVal").number(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True),
914+
],
915+
)
916+
917+
# Verify total_count is 0
918+
assert res.total_count == 0
919+
920+
# Verify integer metrics: count should be 0, all other metrics should be None (not 0)
921+
int_metrics = res.properties["intVal"]
922+
assert isinstance(int_metrics, AggregateInteger)
923+
assert int_metrics.count == 0
924+
assert int_metrics.maximum is None, "maximum should be None for empty result set"
925+
assert int_metrics.mean is None, "mean should be None for empty result set"
926+
assert int_metrics.median is None, "median should be None for empty result set"
927+
assert int_metrics.minimum is None, "minimum should be None for empty result set"
928+
assert int_metrics.mode is None, "mode should be None for empty result set"
929+
assert int_metrics.sum_ is None, "sum_ should be None for empty result set"
930+
931+
# Verify number metrics: count should be 0, all other metrics should be None (not 0.0)
932+
number_metrics = res.properties["numberVal"]
933+
assert isinstance(number_metrics, AggregateNumber)
934+
assert number_metrics.count == 0
935+
assert number_metrics.maximum is None, "maximum should be None for empty result set"
936+
assert number_metrics.mean is None, "mean should be None for empty result set"
937+
assert number_metrics.median is None, "median should be None for empty result set"
938+
assert number_metrics.minimum is None, "minimum should be None for empty result set"
939+
assert number_metrics.mode is None, "mode should be None for empty result set"
940+
assert number_metrics.sum_ is None, "sum_ should be None for empty result set"
941+

weaviate/collections/aggregations/base_executor.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -269,38 +269,38 @@ def __parse_property_grpc(
269269
elif aggregation.HasField("int"):
270270
return AggregateInteger(
271271
count=aggregation.int.count,
272-
maximum=aggregation.int.maximum,
273-
mean=aggregation.int.mean,
274-
median=aggregation.int.median,
275-
minimum=aggregation.int.minimum,
276-
mode=aggregation.int.mode,
277-
sum_=aggregation.int.sum,
272+
maximum=aggregation.int.maximum if aggregation.int.HasField("maximum") else None,
273+
mean=aggregation.int.mean if aggregation.int.HasField("mean") else None,
274+
median=aggregation.int.median if aggregation.int.HasField("median") else None,
275+
minimum=aggregation.int.minimum if aggregation.int.HasField("minimum") else None,
276+
mode=aggregation.int.mode if aggregation.int.HasField("mode") else None,
277+
sum_=aggregation.int.sum if aggregation.int.HasField("sum") else None,
278278
)
279279
elif aggregation.HasField("number"):
280280
return AggregateNumber(
281281
count=aggregation.number.count,
282-
maximum=aggregation.number.maximum,
283-
mean=aggregation.number.mean,
284-
median=aggregation.number.median,
285-
minimum=aggregation.number.minimum,
286-
mode=aggregation.number.mode,
287-
sum_=aggregation.number.sum,
282+
maximum=aggregation.number.maximum if aggregation.number.HasField("maximum") else None,
283+
mean=aggregation.number.mean if aggregation.number.HasField("mean") else None,
284+
median=aggregation.number.median if aggregation.number.HasField("median") else None,
285+
minimum=aggregation.number.minimum if aggregation.number.HasField("minimum") else None,
286+
mode=aggregation.number.mode if aggregation.number.HasField("mode") else None,
287+
sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None,
288288
)
289289
elif aggregation.HasField("boolean"):
290290
return AggregateBoolean(
291291
count=aggregation.boolean.count,
292-
percentage_false=aggregation.boolean.percentage_false,
293-
percentage_true=aggregation.boolean.percentage_true,
294-
total_false=aggregation.boolean.total_false,
295-
total_true=aggregation.boolean.total_true,
292+
percentage_false=aggregation.boolean.percentage_false if aggregation.boolean.HasField("percentage_false") else None,
293+
percentage_true=aggregation.boolean.percentage_true if aggregation.boolean.HasField("percentage_true") else None,
294+
total_false=aggregation.boolean.total_false if aggregation.boolean.HasField("total_false") else None,
295+
total_true=aggregation.boolean.total_true if aggregation.boolean.HasField("total_true") else None,
296296
)
297297
elif aggregation.HasField("date"):
298298
return AggregateDate(
299299
count=aggregation.date.count,
300-
maximum=aggregation.date.maximum,
301-
median=aggregation.date.median,
302-
minimum=aggregation.date.minimum,
303-
mode=aggregation.date.mode,
300+
maximum=aggregation.date.maximum if aggregation.date.HasField("maximum") else None,
301+
median=aggregation.date.median if aggregation.date.HasField("median") else None,
302+
minimum=aggregation.date.minimum if aggregation.date.HasField("minimum") else None,
303+
mode=aggregation.date.mode if aggregation.date.HasField("mode") else None,
304304
)
305305
elif aggregation.HasField("reference"):
306306
return AggregateReference(pointing_to=list(aggregation.reference.pointing_to))

0 commit comments

Comments
 (0)