Skip to content

Commit 00dff79

Browse files
feat: support normalized metric names in APM dashboard (#1043)
Support normalized Prometheus metric names in the APM dashboard, for example traces_span_metrics_calls_total for the traces.span.metrics.calls OTEL metric. Ref. https://redhat.atlassian.net/browse/COO-1614 Signed-off-by: Andreas Gerstmayr <agerstmayr@redhat.com>
1 parent 76cd142 commit 00dff79

1 file changed

Lines changed: 13 additions & 13 deletions

File tree

  • pkg/controllers/uiplugin

pkg/controllers/uiplugin/apm.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func withServiceMetrics(variableMatchers string) dashboard.Option {
2626
timeseries.Chart(),
2727
panel.AddQuery(
2828
query.PromQL(
29-
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s}[$__rate_interval]))", variableMatchers),
29+
fmt.Sprintf(`sum(rate({__name__=~"traces_span_metrics_calls(_total)?", %s}[$__rate_interval]))`, variableMatchers),
3030
query.SeriesNameFormat("req/s"),
3131
),
3232
),
@@ -35,7 +35,7 @@ func withServiceMetrics(variableMatchers string) dashboard.Option {
3535
timeseries.Chart(),
3636
panel.AddQuery(
3737
query.PromQL(
38-
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s, status_code=\"STATUS_CODE_ERROR\"}[$__rate_interval])) or vector(0)", variableMatchers),
38+
fmt.Sprintf(`sum(rate({__name__=~"traces_span_metrics_calls(_total)?", %s, status_code="STATUS_CODE_ERROR"}[$__rate_interval])) or vector(0)`, variableMatchers),
3939
query.SeriesNameFormat("error/s"),
4040
),
4141
),
@@ -53,19 +53,19 @@ func withServiceMetrics(variableMatchers string) dashboard.Option {
5353
),
5454
panel.AddQuery(
5555
query.PromQL(
56-
fmt.Sprintf("histogram_quantile(.95, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
56+
fmt.Sprintf(`histogram_quantile(.95, sum(rate({__name__=~"traces_span_metrics_duration(_milliseconds)?_bucket", %s}[$__rate_interval])) by (le))`, variableMatchers),
5757
query.SeriesNameFormat("95th"),
5858
),
5959
),
6060
panel.AddQuery(
6161
query.PromQL(
62-
fmt.Sprintf("histogram_quantile(.75, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
62+
fmt.Sprintf(`histogram_quantile(.75, sum(rate({__name__=~"traces_span_metrics_duration(_milliseconds)?_bucket", %s}[$__rate_interval])) by (le))`, variableMatchers),
6363
query.SeriesNameFormat("75th"),
6464
),
6565
),
6666
panel.AddQuery(
6767
query.PromQL(
68-
fmt.Sprintf("histogram_quantile(.50, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
68+
fmt.Sprintf(`histogram_quantile(.50, sum(rate({__name__=~"traces_span_metrics_duration(_milliseconds)?_bucket", %s}[$__rate_interval])) by (le))`, variableMatchers),
6969
query.SeriesNameFormat("50th"),
7070
),
7171
),
@@ -122,52 +122,52 @@ func withOperationMetrics(variableMatchers string) dashboard.Option {
122122
),
123123
panel.AddQuery(
124124
query.PromQL(
125-
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s}[$__rate_interval])) by (span_name) > 0", variableMatchers),
125+
fmt.Sprintf(`sum(rate({__name__=~"traces_span_metrics_calls(_total)?", %s}[$__rate_interval])) by (span_name) > 0`, variableMatchers),
126126
query.SeriesNameFormat("req/s"),
127127
),
128128
),
129129
panel.AddQuery(
130130
query.PromQL(
131-
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s, status_code=\"STATUS_CODE_ERROR\"}[$__rate_interval])) by (span_name) > 0", variableMatchers),
131+
fmt.Sprintf(`sum(rate({__name__=~"traces_span_metrics_calls(_total)?", %s, status_code="STATUS_CODE_ERROR"}[$__rate_interval])) by (span_name) > 0`, variableMatchers),
132132
query.SeriesNameFormat("Error rate"),
133133
),
134134
),
135135
panel.AddQuery(
136136
query.PromQL(
137-
fmt.Sprintf("sum(rate(traces_span_metrics_duration_sum{%s}[5m]) / rate(traces_span_metrics_duration_count{%s}[5m])) by (span_name) > 0", variableMatchers, variableMatchers),
138-
query.SeriesNameFormat("95th"),
137+
fmt.Sprintf(`sum(rate({__name__=~"traces_span_metrics_duration(_milliseconds)?_sum", %s}[$__rate_interval])) by (span_name) / sum(rate({__name__=~"traces_span_metrics_duration(_milliseconds)?_count", %s}[$__rate_interval])) by (span_name) > 0`, variableMatchers, variableMatchers),
138+
query.SeriesNameFormat("Duration"),
139139
),
140140
),
141141
),
142142
)
143143
}
144144

145145
func buildAPMDashboard() (dashboard.Builder, error) {
146-
variableMatchers := "namespace=\"$namespace\", service=\"$collector\", service_name=\"$service\""
146+
variableMatchers := `namespace="$namespace", service="$collector", service_name="$service"`
147147

148148
return dashboard.New("apm",
149149
dashboard.Name("Application Performance Monitoring (APM)"),
150150
dashboard.AddVariable("namespace",
151151
listvariable.List(
152152
listvariable.DisplayName("OTEL Collector Namespace"),
153153
labelvalues.PrometheusLabelValues("namespace",
154-
labelvalues.Matchers("traces_span_metrics_calls{}"),
154+
labelvalues.Matchers(`{__name__=~"traces_span_metrics_calls(_total)?"}`),
155155
),
156156
),
157157
),
158158
dashboard.AddVariable("collector",
159159
listvariable.List(
160160
listvariable.DisplayName("OTEL Collector"),
161161
labelvalues.PrometheusLabelValues("service",
162-
labelvalues.Matchers("traces_span_metrics_calls{namespace=\"$namespace\"}"),
162+
labelvalues.Matchers(`{__name__=~"traces_span_metrics_calls(_total)?", namespace="$namespace"}`),
163163
),
164164
),
165165
),
166166
dashboard.AddVariable("service",
167167
listvariable.List(
168168
listvariable.DisplayName("Service"),
169169
labelvalues.PrometheusLabelValues("service_name",
170-
labelvalues.Matchers("traces_span_metrics_calls{namespace=\"$namespace\", service=\"$collector\"}"),
170+
labelvalues.Matchers(`{__name__=~"traces_span_metrics_calls(_total)?", namespace="$namespace", service="$collector"}`),
171171
),
172172
),
173173
),

0 commit comments

Comments
 (0)