Skip to content

Commit 777f0f8

Browse files
fix(querier): force layered aggregation for boolean COUNTER metrics
For COUNTER metrics with boolean DBField (containing if(..., 1, 0)), aggregation functions other than SUM/AVG produce meaningless row-level results in UNLAY mode (e.g. MAX is always 1, AVG is 0/1 ratio). Force them to LAYERED mode: inner SUM aggregates by datasource_interval, outer applies the actual aggregation function on per-window counts. This gives meaningful semantics such as MAX(SUM(...)) for peak window rate. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 87ef104 commit 777f0f8

2 files changed

Lines changed: 31 additions & 1 deletion

File tree

server/querier/engine/clickhouse/clickhouse_test.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,28 @@ var (
608608
db: "event",
609609
input: "SELECT Count(row), alert_policy, alert_policy_id, event_level, auto_service_0, auto_service_type_0, auto_service_type, auto_service FROM alert_event where auto_service='abc' AND auto_service_type_1=1 GROUP BY alert_policy, alert_policy_id, event_level, auto_service_0, auto_service_type_0, auto_service_type, auto_service LIMIT 1",
610610
output: []string{"SELECT dictGet('flow_tag.alarm_policy_map', 'name', (toUInt64(policy_id))) AS `alert_policy`, policy_id AS `alert_policy_id`, event_level, tag_string_values[indexOf(tag_string_names,'auto_service_0')] AS `auto_service_0`, tag_int_values[indexOf(tag_int_names,'auto_service_type_0')] AS `auto_service_type_0`, tag_int_values[indexOf(tag_int_names,'auto_service_type')] AS `auto_service_type`, tag_string_values[indexOf(tag_string_names,'auto_service')] AS `auto_service`, COUNT(1) AS `Count(row)` FROM event.`alert_event` FINAL WHERE if(indexOf(tag_string_names,'auto_service')=0 AND indexOf(tag_string_names,'auto_service_0')=0 AND indexOf(tag_string_names,'auto_service_1')=0,1!=1,(tag_string_values[indexOf(tag_string_names,'auto_service')] = 'abc' OR tag_string_values[indexOf(tag_string_names,'auto_service_0')] = 'abc' OR tag_string_values[indexOf(tag_string_names,'auto_service_1')] = 'abc')) AND if(indexOf(tag_int_names,'auto_service_type_1')=0,NULL,tag_int_values[indexOf(tag_int_names,'auto_service_type_1')]) = 1 GROUP BY dictGet('flow_tag.alarm_policy_map', 'name', (toUInt64(policy_id))) AS `alert_policy`, policy_id AS `alert_policy_id`, `event_level`, tag_string_values[indexOf(tag_string_names,'auto_service_0')] AS `auto_service_0`, tag_int_values[indexOf(tag_int_names,'auto_service_type_0')] AS `auto_service_type_0`, tag_int_values[indexOf(tag_int_names,'auto_service_type')] AS `auto_service_type`, tag_string_values[indexOf(tag_string_names,'auto_service')] AS `auto_service` LIMIT 1"},
611-
}}
611+
}, {
612+
// boolean COUNTER metric: Max on new_flow → LAYERED (MAX(SUM(...)))
613+
input: "select Max(new_flow) as max_new_flow from l4_flow_log limit 1",
614+
output: []string{"SELECT MAX(`_sum_if(is_new_flow=1,1,0)`) AS `max_new_flow` FROM (SELECT SUM(if(is_new_flow=1,1,0)) AS `_sum_if(is_new_flow=1,1,0)` FROM flow_log.`l4_flow_log`) LIMIT 1"},
615+
}, {
616+
// boolean COUNTER metric: Min on new_flow → LAYERED (MIN(SUM(...)))
617+
input: "select Min(new_flow) as min_new_flow from l4_flow_log limit 1",
618+
output: []string{"WITH if(count(`_sum_if(is_new_flow=1,1,0)`)=1, min(`_sum_if(is_new_flow=1,1,0)`), 0) AS `min_fillnullaszero__sum_if(is_new_flow=1,1,0)` SELECT `min_fillnullaszero__sum_if(is_new_flow=1,1,0)` AS `min_new_flow` FROM (SELECT SUM(if(is_new_flow=1,1,0)) AS `_sum_if(is_new_flow=1,1,0)` FROM flow_log.`l4_flow_log`) LIMIT 1"},
619+
}, {
620+
// boolean COUNTER metric: AAvg on new_flow → LAYERED (AVG(SUM(...)))
621+
input: "select AAvg(new_flow) as aavg_new_flow from l4_flow_log limit 1",
622+
output: []string{"SELECT AVG(`_sum_if(is_new_flow=1,1,0)`) AS `aavg_new_flow` FROM (SELECT SUM(if(is_new_flow=1,1,0)) AS `_sum_if(is_new_flow=1,1,0)` FROM flow_log.`l4_flow_log`) LIMIT 1"},
623+
}, {
624+
// control: Sum on new_flow → UNLAY (no change)
625+
input: "select Sum(new_flow) as sum_new_flow from l4_flow_log limit 1",
626+
output: []string{"SELECT SUM(if(is_new_flow=1,1,0)) AS `sum_new_flow` FROM flow_log.`l4_flow_log` LIMIT 1"},
627+
}, {
628+
// boolean COUNTER metric: Max with time grouping → 1s SUM then 120s MAX
629+
input: "select Max(new_flow) as max_new_flow, time(time,120) as time_120 from l4_flow_log group by time_120 limit 1",
630+
output: []string{"WITH toStartOfInterval(_time, toIntervalSecond(120)) + toIntervalSecond(arrayJoin([0]) * 120) AS `_time_120` SELECT toUnixTimestamp(`_time_120`) AS `time_120`, MAX(`_sum_if(is_new_flow=1,1,0)`) AS `max_new_flow` FROM (WITH toStartOfInterval(time, toIntervalSecond(1)) AS `_time` SELECT _time, SUM(if(is_new_flow=1,1,0)) AS `_sum_if(is_new_flow=1,1,0)` FROM flow_log.`l4_flow_log` GROUP BY `_time`) GROUP BY `time_120` LIMIT 1"},
631+
632+
}}
612633
)
613634

614635
func TestGetSql(t *testing.T) {

server/querier/engine/clickhouse/function.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,15 @@ func GetAggFunc(name string, args []string, alias string, derivativeArgs []strin
130130
if isDerivative {
131131
levelFlag = view.MODEL_METRICS_LEVEL_FLAG_LAYERED
132132
}
133+
// Aggregation functions on boolean COUNTER metrics (DBField ends with 1,0)
134+
// Only SUM and AVG keep UNLAY; other aggregators go LAYERED because UNLAY
135+
// produces meaningless row-level results on 0/1 values. Inner SUM groups by
136+
// datasource_interval, outer applies the function on per-window counts.
137+
if metricStruct.Type == metrics.METRICS_TYPE_COUNTER &&
138+
strings.Contains(metricStruct.DBField, ",1,0)") &&
139+
name != view.FUNCTION_SUM && name != view.FUNCTION_AVG {
140+
levelFlag = view.MODEL_METRICS_LEVEL_FLAG_LAYERED
141+
}
133142
return &AggFunction{
134143
Metrics: metricStruct,
135144
Name: name,

0 commit comments

Comments
 (0)