Skip to content

Commit 0c2524f

Browse files
committed
receiver/prometheus: add "up" metric for instances
Make a receiver specific view that'll be registered and used to record the "up" status either "0.0" or "1.0" when an instance can't be scraped from or can be, respectively. This ensures that the collector can act as a passthrough for statuses and it currently outputs: # HELP up Whether the endpoint is alive or not # TYPE up gauge up{instance="0.0.0.0:8888"} 1 up{instance="localhost:9999"} 0 I did not take the approach of plainly sending up suffixed metric names. to recommend instead using relabelling inside the exporter itself like: - source_labels: [__name__] regex: "(.+)_up" target_label: "__name__" replacement: "up" because: * it'd apply ConstLabels on every *_up metric, only want "instance=$INSTANCE" * other exporters wouldn't be able to use the "up" metric as is if we inject rewrites Regardless of if we used a label rewrite, the end result would be the following: up{instance="localhost:8888",job="otlc"} up{exported_instance="0.0.0.0:9999",instance="localhost:8888",job="otlc"} up{exported_instance="0.0.0.0:1234",instance="localhost:8888",job="otlc"} which this change accomplishes without having to inject any label rewrites, but just by the new imports and upgrade of the prometheus exporter. Fixes open-telemetry/prometheus-interoperability-spec#8 Requires census-ecosystem/opencensus-go-exporter-prometheus#24
1 parent 9c31e26 commit 0c2524f

4 files changed

Lines changed: 68 additions & 3 deletions

File tree

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Copyright The OpenTelemetry Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package internal
16+
17+
import (
18+
"context"
19+
20+
"go.opencensus.io/stats"
21+
"go.opencensus.io/stats/view"
22+
"go.opencensus.io/tag"
23+
)
24+
25+
var tagInstance, _ = tag.NewKey("instance")
26+
27+
var statUpStatus = stats.Int64("up", "Whether the endpoint is alive or not", stats.UnitDimensionless)
28+
29+
func MetricViews() []*view.View {
30+
return []*view.View{
31+
{
32+
Name: statUpStatus.Name(),
33+
Measure: statUpStatus,
34+
Description: statUpStatus.Description(),
35+
TagKeys: []tag.Key{tagInstance},
36+
Aggregation: view.LastValue(),
37+
},
38+
}
39+
}
40+
41+
func recordInstanceAsUp(ctx context.Context, instanceValue string) context.Context {
42+
ctx, _ = tag.New(ctx, tag.Upsert(tagInstance, instanceValue))
43+
stats.Record(ctx, statUpStatus.M(1))
44+
return ctx
45+
}
46+
47+
func recordInstanceAsDown(ctx context.Context, instanceValue string) context.Context {
48+
ctx, _ = tag.New(ctx, tag.Upsert(tagInstance, instanceValue))
49+
stats.Record(ctx, statUpStatus.M(0))
50+
return ctx
51+
}

receiver/prometheusreceiver/internal/metricsbuilder.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package internal
1616

1717
import (
18+
"context"
1819
"errors"
1920
"fmt"
2021
"regexp"
@@ -99,18 +100,27 @@ func (b *metricBuilder) AddDataPoint(ls labels.Labels, t int64, v float64) error
99100
delete(lm, model.MetricNameLabel)
100101
// See https://www.prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
101102
// up: 1 if the instance is healthy, i.e. reachable, or 0 if the scrape failed.
102-
if metricName == scrapeUpMetricName && v != 1.0 {
103-
if v == 0.0 {
103+
instanceValue := lm["instance"]
104+
if metricName == scrapeUpMetricName {
105+
switch v {
106+
case 1.0: // The instance is up!
107+
recordInstanceAsUp(context.Background(), instanceValue)
108+
109+
case 0.0: // The instance is definitely down.
110+
recordInstanceAsDown(context.Background(), instanceValue)
104111
b.logger.Warn("Failed to scrape Prometheus endpoint",
105112
zap.Int64("scrape_timestamp", t),
106113
zap.String("target_labels", fmt.Sprintf("%v", lm)))
107-
} else {
114+
115+
default: // We got an invalid value for "up"
116+
recordInstanceAsDown(context.Background(), instanceValue)
108117
b.logger.Warn("The 'up' metric contains invalid value",
109118
zap.Float64("value", v),
110119
zap.Int64("scrape_timestamp", t),
111120
zap.String("target_labels", fmt.Sprintf("%v", lm)))
112121
}
113122
}
123+
114124
return nil
115125
case b.useStartTimeMetric && b.matchStartTimeMetric(metricName):
116126
b.startTime = v

receiver/prometheusreceiver/metrics_receiver.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ func newPrometheusReceiver(logger *zap.Logger, cfg *Config, next consumer.Metric
4949
return pr
5050
}
5151

52+
var MetricViews = internal.MetricViews
53+
5254
// Start is the method that starts Prometheus scraping and it
5355
// is controlled by having previously defined a Configuration using perhaps New.
5456
func (r *pReceiver) Start(_ context.Context, host component.Host) error {

service/telemetry.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"go.opentelemetry.io/collector/obsreport"
3131
"go.opentelemetry.io/collector/processor/batchprocessor"
3232
"go.opentelemetry.io/collector/receiver/kafkareceiver"
33+
prometheusreceiver "go.opentelemetry.io/collector/receiver/prometheusreceiver"
3334
telemetry2 "go.opentelemetry.io/collector/service/internal/telemetry"
3435
"go.opentelemetry.io/collector/translator/conventions"
3536
)
@@ -66,6 +67,7 @@ func (tel *appTelemetry) init(asyncErrorChannel chan<- error, ballastSizeBytes u
6667
views = append(views, kafkareceiver.MetricViews()...)
6768
views = append(views, obsreport.Configure(level)...)
6869
views = append(views, processMetricsViews.Views()...)
70+
views = append(views, prometheusreceiver.MetricViews()...)
6971

7072
tel.views = views
7173
if err = view.Register(views...); err != nil {

0 commit comments

Comments
 (0)