From b9a1682e25b64fb40f892268071936e69b65c2a8 Mon Sep 17 00:00:00 2001 From: reilly Date: Sun, 26 Apr 2026 00:27:52 +0800 Subject: [PATCH] fix(tdigest): fix incorrect min/max default initialization CentroidsWithDelta struct was default-initializing min and max to 0, which caused incorrect behavior when merging empty tdigests. Also delete Reset({}) overload to prevent accidental misuse. Assisted-by: Claude Code:glm-5 --- src/types/tdigest.cc | 3 + src/types/tdigest.h | 9 +-- .../gocase/unit/type/tdigest/tdigest_test.go | 56 +++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/types/tdigest.cc b/src/types/tdigest.cc index 2f5ef689adc..445d83fd211 100644 --- a/src/types/tdigest.cc +++ b/src/types/tdigest.cc @@ -28,6 +28,7 @@ refer to https://github.com/apache/arrow/blob/27bbd593625122a4a25d9471c8aaf5df54 #include #include +#include #include #include @@ -355,6 +356,8 @@ class TDigest { void Merge(const std::vector& others); void Add(const std::vector& items); void Reset(const CentroidsWithDelta& centroid_list); + // Callers must explicitly specify the initial state to reset to; implicit initialization like Reset({}) is forbidden. + void Reset(std::initializer_list) = delete; void Reset(); CentroidsWithDelta DumpCentroids() const; diff --git a/src/types/tdigest.h b/src/types/tdigest.h index 1f152c1e00d..531f693ed20 100644 --- a/src/types/tdigest.h +++ b/src/types/tdigest.h @@ -22,6 +22,7 @@ #include +#include #include #include #include @@ -47,10 +48,10 @@ struct Centroid { struct CentroidsWithDelta { std::vector centroids; - uint64_t delta; - double min; - double max; - double total_weight; + uint64_t delta = 0; + double min = std::numeric_limits::max(); + double max = std::numeric_limits::lowest(); + double total_weight = 0; }; StatusOr TDigestMerge(const std::vector& centroids_list, uint64_t delta); diff --git a/tests/gocase/unit/type/tdigest/tdigest_test.go b/tests/gocase/unit/type/tdigest/tdigest_test.go index b7b4893a75d..e4daa3e188a 100644 --- a/tests/gocase/unit/type/tdigest/tdigest_test.go +++ b/tests/gocase/unit/type/tdigest/tdigest_test.go @@ -689,6 +689,62 @@ func tdigestTests(t *testing.T, configs util.KvrocksServerConfigs) { require.EqualValues(t, 5, info.Observations) // src data replaced dest data due to OVERRIDE }) + // Regression test for empty merge: min/max should be correctly initialized + // Before fix: merging empty sources initialized min/max to 0, corrupting subsequent add + t.Run("tdigest.merge empty sources then add samples", func(t *testing.T) { + keyPrefix := "tdigest_merge_empty_" + + srcKey1 := keyPrefix + "src1" + srcKey2 := keyPrefix + "src2" + destKey := keyPrefix + "dest" + + // Create empty sources (no observations added) + require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", srcKey1, "compression", "100").Err()) + require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", srcKey2, "compression", "100").Err()) + + // Merge empty sources into new destination + require.NoError(t, rdb.Do(ctx, "TDIGEST.MERGE", destKey, 2, srcKey1, srcKey2).Err()) + + // Verify dest is empty + rsp := rdb.Do(ctx, "TDIGEST.INFO", destKey) + require.NoError(t, rsp.Err()) + info := toTdigestInfo(t, rsp.Val()) + require.EqualValues(t, 0, info.Observations) + + // Add positive samples and verify min/max + require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", destKey, "10", "20", "30").Err()) + + rsp = rdb.Do(ctx, "TDIGEST.MIN", destKey) + require.NoError(t, rsp.Err()) + minVal, err := rsp.Float64() + require.NoError(t, err) + require.InEpsilon(t, 10.0, minVal, 0.1, "min should be 10, not 0") + + rsp = rdb.Do(ctx, "TDIGEST.MAX", destKey) + require.NoError(t, rsp.Err()) + maxVal, err := rsp.Float64() + require.NoError(t, err) + require.InEpsilon(t, 30.0, maxVal, 0.1, "max should be 30") + + // Test with negative samples + destKey2 := keyPrefix + "dest2" + require.NoError(t, rdb.Do(ctx, "TDIGEST.MERGE", destKey2, 2, srcKey1, srcKey2).Err()) + + require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", destKey2, "-10", "-20", "-30").Err()) + + rsp = rdb.Do(ctx, "TDIGEST.MIN", destKey2) + require.NoError(t, rsp.Err()) + minVal, err = rsp.Float64() + require.NoError(t, err) + require.InEpsilon(t, -30.0, minVal, 0.1, "min should be -30") + + rsp = rdb.Do(ctx, "TDIGEST.MAX", destKey2) + require.NoError(t, rsp.Err()) + maxVal, err = rsp.Float64() + require.NoError(t, err) + require.InEpsilon(t, -10.0, maxVal, 0.1, "max should be -10, not 0") + }) + t.Run("tdigest.revrank with different arguments", func(t *testing.T) { keyPrefix := "tdigest_revrank_"