Skip to content

Commit d94ab5f

Browse files
committed
LOG-9356: Implement Collector ServiceMonitor Changes
1 parent 30295d6 commit d94ab5f

16 files changed

Lines changed: 627 additions & 118 deletions

File tree

internal/constants/constants.go

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,20 @@ const (
3939
InjectTrustedCABundleLabel = "config.openshift.io/inject-trusted-cabundle"
4040

4141
//ServiceAccountSecretPath is the path to find the projected serviceAccount token and other SA secrets
42-
ServiceAccountSecretPath = "/var/run/ocp-collector/serviceaccount"
43-
TrustedCABundleMountFile = "tls-ca-bundle.pem"
44-
TrustedCABundleMountDir = "/etc/pki/ca-trust/extracted/pem/"
45-
ElasticsearchName = "elasticsearch"
46-
VectorName = "vector"
47-
KibanaName = "kibana"
48-
LogfilesmetricexporterName = "logfilesmetricexporter"
49-
LogfilesmetricexporterPort = int32(2112)
50-
MetricsPortName = "metrics"
51-
MetricsPort = int32(24231)
52-
PodSecurityLabelEnforce = "pod-security.kubernetes.io/enforce"
53-
PodSecurityLabelValue = "privileged"
42+
ServiceAccountSecretPath = "/var/run/ocp-collector/serviceaccount"
43+
TrustedCABundleMountFile = "tls-ca-bundle.pem"
44+
TrustedCABundleMountDir = "/etc/pki/ca-trust/extracted/pem/"
45+
ElasticsearchName = "elasticsearch"
46+
VectorName = "vector"
47+
KibanaName = "kibana"
48+
LogfilesmetricexporterName = "logfilesmetricexporter"
49+
LogfilesmetricexporterPort = int32(2112)
50+
MetricsPortName = "metrics"
51+
MetricsPort = int32(24231)
52+
MetricsCollectionProfileFull = "full"
53+
MetricsCollectionProfileMinimal = "minimal"
54+
PodSecurityLabelEnforce = "pod-security.kubernetes.io/enforce"
55+
PodSecurityLabelValue = "privileged"
5456
// Disable gosec linter, complains "possible hard-coded secret"
5557
CollectorSecretsDir = "/var/run/ocp-collector/secrets" //nolint:gosec
5658
ConfigMapBaseDir = "/var/run/ocp-collector/config"

internal/constants/labels.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ const (
1414
LabelLoggingServiceType = "logging.observability.openshift.io/service-type"
1515
LabelLoggingInputServiceType = "logging.observability.openshift.io/input-service-type"
1616

17+
LabelMetricsCollectionProfile = "monitoring.openshift.io/collection-profile"
18+
1719
ServiceTypeMetrics = "metrics"
1820
ServiceTypeInput = "input"
1921
)

internal/controller/observability/collector.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,12 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
153153
return err
154154
}
155155
metricsSelector := metrics.BuildSelector(constants.CollectorName, resourceNames.CommonName)
156-
if err := metrics.ReconcileServiceMonitor(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, ownerRef, metricsSelector, constants.MetricsPortName); err != nil {
157-
log.Error(err, "collector.ReconcileServiceMonitor")
156+
if err := metrics.ReconcileServiceMonitor(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, resourceNames.CommonName, ownerRef, metricsSelector, constants.MetricsPortName, metrics.FullRelabelConfigs, constants.MetricsCollectionProfileFull); err != nil {
157+
log.Error(err, "collector.ReconcileServiceMonitor full")
158+
return err
159+
}
160+
if err := metrics.ReconcileServiceMonitor(context.Client, context.Forwarder.Namespace, constants.MetricsCollectionProfileMinimal+"-"+resourceNames.CommonName, resourceNames.CommonName, ownerRef, metricsSelector, constants.MetricsPortName, metrics.CollectorMinimalRelabelConfigs, constants.MetricsCollectionProfileMinimal); err != nil {
161+
log.Error(err, "collector.ReconcileServiceMonitor minimal")
158162
return err
159163
}
160164

internal/controller/observability/collector_test.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,22 @@ var _ = Describe("Reconciling the Collector", func() {
225225
service := &corev1.Service{}
226226
Expect(client.Get(context.TODO(), key, service)).Should(Succeed(), "Exp. to create a Service for metrics")
227227

228-
sm := &monitoringv1.ServiceMonitor{}
229-
Expect(client.Get(context.TODO(), key, sm)).Should(Succeed(), "Exp. to create a ServiceMonitor for metrics")
228+
By("verifying the full profile ServiceMonitor")
229+
fullSM := &monitoringv1.ServiceMonitor{}
230+
Expect(client.Get(context.TODO(), key, fullSM)).Should(Succeed(), "Exp. to create a full profile ServiceMonitor")
231+
Expect(fullSM.Labels[constants.LabelMetricsCollectionProfile]).To(Equal(constants.MetricsCollectionProfileFull))
232+
Expect(fullSM.Spec.Endpoints).ToNot(BeEmpty())
233+
Expect(fullSM.Spec.Endpoints[0].MetricRelabelConfigs).To(HaveLen(1), "full profile should only have the rename rule")
234+
235+
By("verifying the minimal profile ServiceMonitor")
236+
minimalKey := types.NamespacedName{Name: constants.MetricsCollectionProfileMinimal + "-" + clfName, Namespace: namespaceName}
237+
minimalSM := &monitoringv1.ServiceMonitor{}
238+
Expect(client.Get(context.TODO(), minimalKey, minimalSM)).Should(Succeed(), "Exp. to create a minimal profile ServiceMonitor")
239+
Expect(minimalSM.Labels[constants.LabelMetricsCollectionProfile]).To(Equal(constants.MetricsCollectionProfileMinimal))
240+
Expect(minimalSM.Spec.Endpoints).ToNot(BeEmpty())
241+
Expect(minimalSM.Spec.Endpoints[0].MetricRelabelConfigs).To(HaveLen(3), "minimal profile should have rename + keep + drop")
242+
Expect(string(minimalSM.Spec.Endpoints[0].MetricRelabelConfigs[1].Action)).To(Equal("keep"))
243+
Expect(string(minimalSM.Spec.Endpoints[0].MetricRelabelConfigs[2].Action)).To(Equal("drop"))
230244

231245
},
232246
Entry("when deployed as a DaemonSet", forwarder),

internal/metrics/logfilemetricexporter/metric_exporter.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,12 @@ func Reconcile(lfmeInstance *loggingv1alpha1.LogFileMetricExporter,
7676
}
7777

7878
metricsSelector := metrics.BuildSelector(constants.LogfilesmetricexporterName, lfmeInstance.Name)
79-
if err := metrics.ReconcileServiceMonitor(requestClient, lfmeInstance.Namespace, resNames.CommonName, owner, metricsSelector, constants.MetricsPortName); err != nil {
80-
log.Error(err, "logfilemetricexporter.ReconcileServiceMonitor")
79+
if err := metrics.ReconcileServiceMonitor(requestClient, lfmeInstance.Namespace, resNames.CommonName, resNames.CommonName, owner, metricsSelector, constants.MetricsPortName, metrics.FullRelabelConfigs, constants.MetricsCollectionProfileFull); err != nil {
80+
log.Error(err, "logfilemetricexporter.ReconcileServiceMonitor full")
81+
return err
82+
}
83+
if err := metrics.ReconcileServiceMonitor(requestClient, lfmeInstance.Namespace, constants.MetricsCollectionProfileMinimal+"-"+resNames.CommonName, resNames.CommonName, owner, metricsSelector, constants.MetricsPortName, metrics.LFMEMinimalRelabelConfigs, constants.MetricsCollectionProfileMinimal); err != nil {
84+
log.Error(err, "logfilemetricexporter.ReconcileServiceMonitor minimal")
8185
return err
8286
}
8387

internal/metrics/logfilemetricexporter/metric_exporter_test.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ var _ = Describe("Reconcile LogFileMetricExporter", func() {
115115
Expect(serviceInstance.Annotations[constants.AnnotationServingCertSecretName]).
116116
To(Equal(ExporterMetricsSecretName))
117117

118-
// ServiceMonitor
119-
// Get and check the ServiceMonitor
118+
// ServiceMonitor (full profile)
120119
Expect(reqClient.Get(context.TODO(), serviceMonitorKey, smInstance)).Should(Succeed())
121120

122121
Expect(smInstance.Name).To(Equal(constants.LogfilesmetricexporterName))
122+
Expect(smInstance.Labels[constants.LabelMetricsCollectionProfile]).To(Equal(constants.MetricsCollectionProfileFull))
123123

124124
expJobLabel := fmt.Sprintf("monitor-%s", constants.LogfilesmetricexporterName)
125125
Expect(smInstance.Spec.JobLabel).To(Equal(expJobLabel))
@@ -132,6 +132,17 @@ var _ = Describe("Reconcile LogFileMetricExporter", func() {
132132

133133
Expect(smInstance.Spec.Endpoints[0].BearerTokenFile).
134134
To(Equal("/var/run/secrets/kubernetes.io/serviceaccount/token"))
135+
Expect(smInstance.Spec.Endpoints[0].MetricRelabelConfigs).To(HaveLen(1), "full profile should only have the rename rule")
136+
137+
// ServiceMonitor (minimal profile)
138+
minimalName := constants.MetricsCollectionProfileMinimal + "-" + constants.LogfilesmetricexporterName
139+
minimalSM := &monitoringv1.ServiceMonitor{}
140+
Expect(reqClient.Get(context.TODO(), types.NamespacedName{Name: minimalName, Namespace: namespace.Name}, minimalSM)).Should(Succeed())
141+
Expect(minimalSM.Labels[constants.LabelMetricsCollectionProfile]).To(Equal(constants.MetricsCollectionProfileMinimal))
142+
Expect(minimalSM.Spec.Endpoints).ToNot(BeEmpty())
143+
Expect(minimalSM.Spec.Endpoints[0].TLSConfig.SafeTLSConfig.ServerName).To(Equal(svcURL))
144+
Expect(minimalSM.Spec.Endpoints[0].MetricRelabelConfigs).To(HaveLen(2), "LFME minimal profile should have rename + keep")
145+
Expect(string(minimalSM.Spec.Endpoints[0].MetricRelabelConfigs[1].Action)).To(Equal("keep"))
135146

136147
// Metrics Auth RBAC
137148
// Verify the metrics auth ClusterRoleBinding exists and references system:auth-delegator

internal/metrics/relabel.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package metrics
2+
3+
import (
4+
"strings"
5+
6+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
7+
)
8+
9+
type metricAllowlistConfig struct {
10+
allowedMetrics []string
11+
}
12+
13+
type metricDropConfig struct {
14+
labelName string
15+
labelValue string
16+
excludeMetrics []string
17+
}
18+
19+
var collectorMinimalAllowlist = &metricAllowlistConfig{
20+
allowedMetrics: []string{
21+
// Metrics used in alerts (collector_alerts.yaml)
22+
"logcollector_component_event_unmatched_count",
23+
"vector_http_client_errors_total",
24+
"vector_http_client_requests_sent_total",
25+
"vector_http_client_responses_total",
26+
"vector_buffer_byte_size",
27+
"vector_component_errors_total",
28+
"vector_component_received_events_total",
29+
30+
// Metrics used in recording rules (collector_alerts.yaml, telemetry_rules.yaml)
31+
"vector_component_received_bytes_total",
32+
33+
// Metrics used in dashboards (openshift-logging-dashboard.json)
34+
"vector_component_sent_bytes_total",
35+
"vector_component_received_event_bytes_total",
36+
"vector_open_files",
37+
"vector_component_discarded_events_total",
38+
39+
// Additional buffer and event metrics
40+
"vector_buffer_discarded_events_total",
41+
"vector_buffer_events",
42+
"vector_buffer_sent_events_total",
43+
"vector_events_in_total",
44+
},
45+
}
46+
47+
var collectorMinimalDropConfigs = []metricDropConfig{
48+
{
49+
labelName: "component_kind",
50+
labelValue: "transform",
51+
excludeMetrics: []string{
52+
"vector_component_received_bytes_total",
53+
"vector_component_received_event_bytes_total",
54+
"vector_component_received_events_total",
55+
"vector_component_sent_bytes_total",
56+
},
57+
},
58+
}
59+
60+
var lfmeMinimalAllowlist = &metricAllowlistConfig{
61+
allowedMetrics: []string{
62+
// Used in recording rule (collector_alerts.yaml) and dashboard
63+
"log_logged_bytes_total",
64+
},
65+
}
66+
67+
var CollectorMinimalRelabelConfigs = buildRelabelConfigs(collectorMinimalAllowlist, collectorMinimalDropConfigs)
68+
var LFMEMinimalRelabelConfigs = buildRelabelConfigs(lfmeMinimalAllowlist, nil)
69+
var FullRelabelConfigs = buildRelabelConfigs(nil, nil)
70+
71+
func buildRelabelConfigs(allowlist *metricAllowlistConfig, dropConfigs []metricDropConfig) []*monitoringv1.RelabelConfig {
72+
configs := []*monitoringv1.RelabelConfig{
73+
{
74+
SourceLabels: []monitoringv1.LabelName{"__name__"},
75+
TargetLabel: "__name__",
76+
Regex: "(.*)-(.*)",
77+
Replacement: "${1}_${2}",
78+
},
79+
}
80+
81+
if allowlist != nil && len(allowlist.allowedMetrics) > 0 {
82+
configs = append(configs, &monitoringv1.RelabelConfig{
83+
Action: "keep",
84+
SourceLabels: []monitoringv1.LabelName{"__name__"},
85+
Regex: strings.Join(allowlist.allowedMetrics, "|"),
86+
})
87+
}
88+
89+
for _, drop := range dropConfigs {
90+
configs = append(configs, &monitoringv1.RelabelConfig{
91+
Action: "drop",
92+
SourceLabels: []monitoringv1.LabelName{monitoringv1.LabelName(drop.labelName), "__name__"},
93+
Regex: drop.labelValue + ";(" + strings.Join(drop.excludeMetrics, "|") + ")",
94+
})
95+
}
96+
97+
return configs
98+
}

internal/metrics/relabel_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package metrics
2+
3+
import (
4+
. "github.com/onsi/ginkgo/v2"
5+
. "github.com/onsi/gomega"
6+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
7+
)
8+
9+
var _ = Describe("buildRelabelConfigs", func() {
10+
It("should return only the rename rule when no allowlist or drops are provided", func() {
11+
configs := buildRelabelConfigs(nil, nil)
12+
Expect(configs).To(HaveLen(1))
13+
Expect(configs[0].TargetLabel).To(Equal("__name__"))
14+
Expect(configs[0].Regex).To(Equal("(.*)-(.*)"))
15+
Expect(configs[0].Replacement).To(Equal("${1}_${2}"))
16+
})
17+
18+
It("should return rename + keep when an allowlist is provided", func() {
19+
allowlist := &metricAllowlistConfig{
20+
allowedMetrics: []string{"metric_a", "metric_b"},
21+
}
22+
configs := buildRelabelConfigs(allowlist, nil)
23+
Expect(configs).To(HaveLen(2))
24+
25+
Expect(configs[0].Regex).To(Equal("(.*)-(.*)"))
26+
27+
Expect(string(configs[1].Action)).To(Equal("keep"))
28+
Expect(configs[1].SourceLabels).To(Equal([]monitoringv1.LabelName{"__name__"}))
29+
Expect(configs[1].Regex).To(Equal("metric_a|metric_b"))
30+
})
31+
32+
It("should return rename + keep + drop when allowlist and drops are provided", func() {
33+
allowlist := &metricAllowlistConfig{
34+
allowedMetrics: []string{"metric_a", "metric_b", "metric_c"},
35+
}
36+
drops := []metricDropConfig{
37+
{
38+
labelName: "component_kind",
39+
labelValue: "transform",
40+
excludeMetrics: []string{"metric_a", "metric_b"},
41+
},
42+
}
43+
configs := buildRelabelConfigs(allowlist, drops)
44+
Expect(configs).To(HaveLen(3))
45+
46+
Expect(string(configs[1].Action)).To(Equal("keep"))
47+
Expect(configs[1].Regex).To(Equal("metric_a|metric_b|metric_c"))
48+
49+
Expect(string(configs[2].Action)).To(Equal("drop"))
50+
Expect(configs[2].SourceLabels).To(Equal([]monitoringv1.LabelName{"component_kind", "__name__"}))
51+
Expect(configs[2].Regex).To(Equal("transform;(metric_a|metric_b)"))
52+
})
53+
54+
It("should build valid CollectorMinimalRelabelConfigs", func() {
55+
Expect(CollectorMinimalRelabelConfigs).To(HaveLen(3))
56+
Expect(string(CollectorMinimalRelabelConfigs[1].Action)).To(Equal("keep"))
57+
Expect(string(CollectorMinimalRelabelConfigs[2].Action)).To(Equal("drop"))
58+
59+
keepRegex := CollectorMinimalRelabelConfigs[1].Regex
60+
for _, m := range collectorMinimalAllowlist.allowedMetrics {
61+
Expect(keepRegex).To(ContainSubstring(m), "missing metric in keep regex: %s", m)
62+
}
63+
})
64+
65+
It("should build valid LFMEMinimalRelabelConfigs", func() {
66+
Expect(LFMEMinimalRelabelConfigs).To(HaveLen(2))
67+
Expect(string(LFMEMinimalRelabelConfigs[1].Action)).To(Equal("keep"))
68+
Expect(LFMEMinimalRelabelConfigs[1].Regex).To(Equal("log_logged_bytes_total"))
69+
})
70+
71+
It("should build FullRelabelConfigs with only the rename rule", func() {
72+
Expect(FullRelabelConfigs).To(HaveLen(1))
73+
Expect(FullRelabelConfigs[0].Regex).To(Equal("(.*)-(.*)"))
74+
})
75+
76+
It("should return only the rename rule when allowlist has empty metrics", func() {
77+
allowlist := &metricAllowlistConfig{
78+
allowedMetrics: []string{},
79+
}
80+
configs := buildRelabelConfigs(allowlist, nil)
81+
Expect(configs).To(HaveLen(1))
82+
Expect(configs[0].TargetLabel).To(Equal("__name__"))
83+
})
84+
85+
It("should return rename + drop when only drop configs are provided", func() {
86+
drops := []metricDropConfig{
87+
{
88+
labelName: "component_kind",
89+
labelValue: "transform",
90+
excludeMetrics: []string{"metric_a"},
91+
},
92+
}
93+
configs := buildRelabelConfigs(nil, drops)
94+
Expect(configs).To(HaveLen(2))
95+
96+
Expect(configs[0].Regex).To(Equal("(.*)-(.*)"))
97+
Expect(string(configs[1].Action)).To(Equal("drop"))
98+
Expect(configs[1].SourceLabels).To(Equal([]monitoringv1.LabelName{"component_kind", "__name__"}))
99+
Expect(configs[1].Regex).To(Equal("transform;(metric_a)"))
100+
})
101+
})

internal/metrics/service_monitor.go

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const (
1717
prometheusBearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
1818
)
1919

20-
func newServiceMonitor(namespace, name string, owner metav1.OwnerReference, selector map[string]string, portName string) *monitoringv1.ServiceMonitor {
20+
func newServiceMonitor(namespace, name, serviceName string, owner metav1.OwnerReference, selector map[string]string, portName string, metricRelabelConfigs []*monitoringv1.RelabelConfig, profile string) *monitoringv1.ServiceMonitor {
2121
var endpoint = []monitoringv1.Endpoint{
2222
{
2323
Port: portName,
@@ -27,26 +27,18 @@ func newServiceMonitor(namespace, name string, owner metav1.OwnerReference, sele
2727
TLSConfig: &monitoringv1.TLSConfig{
2828
CAFile: prometheusCAFile,
2929
SafeTLSConfig: monitoringv1.SafeTLSConfig{
30-
ServerName: fmt.Sprintf("%s.%s.svc", name, namespace),
31-
},
32-
},
33-
// Replaces labels that have `-` with `_`
34-
// Example:
35-
// app_kubernetes_io_part-of -> app_kubernetes_io_part_of
36-
MetricRelabelConfigs: []*monitoringv1.RelabelConfig{
37-
{
38-
SourceLabels: []monitoringv1.LabelName{
39-
"__name__",
40-
},
41-
TargetLabel: "__name__",
42-
Regex: "(.*)-(.*)",
43-
Replacement: "${1}_${2}",
30+
ServerName: fmt.Sprintf("%s.%s.svc", serviceName, namespace),
4431
},
4532
},
33+
MetricRelabelConfigs: metricRelabelConfigs,
4634
},
4735
}
4836

4937
desired := runtime.NewServiceMonitor(namespace, name)
38+
if desired.Labels == nil {
39+
desired.Labels = map[string]string{}
40+
}
41+
desired.Labels[constants.LabelMetricsCollectionProfile] = profile
5042
desired.Spec = monitoringv1.ServiceMonitorSpec{
5143
JobLabel: fmt.Sprintf("monitor-%s", name),
5244
Endpoints: endpoint,
@@ -77,7 +69,7 @@ func BuildSelector(component, instance string) map[string]string {
7769
}
7870
}
7971

80-
func ReconcileServiceMonitor(k8sClient client.Client, namespace, name string, owner metav1.OwnerReference, selector map[string]string, portName string) error {
81-
desired := newServiceMonitor(namespace, name, owner, selector, portName)
72+
func ReconcileServiceMonitor(k8sClient client.Client, namespace, name, serviceName string, owner metav1.OwnerReference, selector map[string]string, portName string, metricRelabelConfigs []*monitoringv1.RelabelConfig, profile string) error {
73+
desired := newServiceMonitor(namespace, name, serviceName, owner, selector, portName, metricRelabelConfigs, profile)
8274
return reconcile.ServiceMonitor(k8sClient, desired)
8375
}

0 commit comments

Comments
 (0)