Skip to content

Commit cbd6ba3

Browse files
jan--fJan Fajerskiclaude
authored
COO-1687: feat: migrate to EndpointSlice service discovery (#1028)
* feat: migrate to EndpointSlice service discovery Prometheus Operator defaults to watching the deprecated Endpoints API for service discovery. Switch the operator's own ServiceMonitors to use EndpointSlice explicitly, which eliminates the deprecation log noise from the operator's internal components. Changes: - Set serviceDiscoveryRole: EndpointSlice on the ServiceMonitors we own (observability-operator, health-analyzer, thanos-querier) so that prometheus-operator uses the EndpointSlice role for these jobs. - Add discovery.k8s.io/endpointslices to all Prometheus RBAC roles and ClusterRoles (alongside the existing endpoints permission) so that Prometheus can serve both kinds of ServiceMonitors simultaneously. - Add discovery.k8s.io/endpointslices to the korrel8r ClusterRole so the correlation tool can read both endpoint representations. - Add the corresponding kubebuilder markers and update the generated cluster role YAML and CSV. The Prometheus CR's global serviceDiscoveryRole is intentionally left unset (defaulting to Endpoints) so that user-created ServiceMonitors continue to work without modification. Users can opt individual ServiceMonitors into EndpointSlice by setting serviceDiscoveryRole: EndpointSlice on them. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Jan Fajerski <jan@fajerski.name> * fix: revert serviceDiscoveryRole from monitoring.coreos.com ServiceMonitors The operator's self-monitoring ServiceMonitor and the health-analyzer ServiceMonitor are monitoring.coreos.com objects processed by the platform prometheus-operator on OpenShift, which we don't control. Setting serviceDiscoveryRole: EndpointSlice on them requires the platform Prometheus to have endpointslices access and the platform prometheus-operator to correctly generate TLS-aware scrape configs for the endpointslice role — neither of which is guaranteed across OCP versions. The thanos-querier ServiceMonitor (monitoring.rhobs) is handled by the obo-prometheus-operator we manage, so it retains the EndpointSlice setting safely. Fixes TestOperatorMetrics/metrics_ingested_in_Prometheus on OCP clusters. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> --------- Signed-off-by: Jan Fajerski <jan@fajerski.name> Co-authored-by: Jan Fajerski <jan@fajerski.name> Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent d844495 commit cbd6ba3

9 files changed

Lines changed: 38 additions & 1 deletion

File tree

bundle/manifests/observability-operator.clusterserviceversion.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ metadata:
4343
certified: "false"
4444
console.openshift.io/operator-monitoring-default: "true"
4545
containerImage: observability-operator:1.3.0
46-
createdAt: "2026-03-03T14:08:32Z"
46+
createdAt: "2026-03-09T07:53:46Z"
4747
description: A Go based Kubernetes operator to setup and manage highly available
4848
Monitoring Stack using Prometheus, Alertmanager and Thanos Querier.
4949
operatorframework.io/cluster-monitoring: "true"
@@ -436,6 +436,14 @@ spec:
436436
- patch
437437
- update
438438
- watch
439+
- apiGroups:
440+
- discovery.k8s.io
441+
resources:
442+
- endpointslices
443+
verbs:
444+
- get
445+
- list
446+
- watch
439447
- apiGroups:
440448
- extensions
441449
- networking.k8s.io

deploy/operator/observability-operator-cluster-role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,14 @@ rules:
119119
- patch
120120
- update
121121
- watch
122+
- apiGroups:
123+
- discovery.k8s.io
124+
resources:
125+
- endpointslices
126+
verbs:
127+
- get
128+
- list
129+
- watch
122130
- apiGroups:
123131
- extensions
124132
- networking.k8s.io

pkg/controllers/monitoring/monitoring-stack/components.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ func newPrometheusClusterRole(rbacResourceName string, rbacVerbs []string) *rbac
100100
APIGroups: []string{""},
101101
Resources: []string{"services", "endpoints", "pods"},
102102
Verbs: rbacVerbs,
103+
}, {
104+
APIGroups: []string{"discovery.k8s.io"},
105+
Resources: []string{"endpointslices"},
106+
Verbs: rbacVerbs,
103107
}, {
104108
APIGroups: []string{"extensions", "networking.k8s.io"},
105109
Resources: []string{"ingresses"},

pkg/controllers/monitoring/monitoring-stack/controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ const finalizerName = "monitoring.observability.openshift.io/finalizer"
8080

8181
// RBAC for delegating permissions to Prometheus
8282
//+kubebuilder:rbac:groups="",resources=pods;services;endpoints,verbs=get;list;watch
83+
//+kubebuilder:rbac:groups=discovery.k8s.io,resources=endpointslices,verbs=get;list;watch
8384
//+kubebuilder:rbac:groups=extensions;networking.k8s.io,resources=ingresses,verbs=get;list;watch
8485

8586
// RBAC for delegating the use of SCC nonroot-v2 (for OpenShift >= 4.11) and nonroot (for OpenShift < 4.11)

pkg/controllers/monitoring/thanos-querier/components.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ func newServiceMonitor(name string, namespace string, thanos *msoapi.ThanosQueri
249249
Labels: componentLabels(name),
250250
},
251251
Spec: monv1.ServiceMonitorSpec{
252+
ServiceDiscoveryRole: ptr.To(monv1.EndpointSliceRole),
252253
Endpoints: []monv1.Endpoint{
253254
{
254255
Port: "http",

pkg/controllers/operator/components.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ func newPrometheusRole(namespace string) *rbacv1.Role {
8686
APIGroups: []string{""},
8787
Resources: []string{"services", "endpoints", "pods"},
8888
Verbs: []string{"get", "list", "watch"},
89+
}, {
90+
APIGroups: []string{"discovery.k8s.io"},
91+
Resources: []string{"endpointslices"},
92+
Verbs: []string{"get", "list", "watch"},
8993
}},
9094
}
9195
}

pkg/controllers/uiplugin/controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ const (
9595
//+kubebuilder:rbac:groups=apps,resources=daemonsets;deployments;replicasets;statefulsets,verbs=get;list;watch
9696
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings;clusterroles;clusterrolebindings,verbs=get;list;watch
9797
//+kubebuilder:rbac:groups="",resources=configmaps;endpoints;events;namespaces;nodes;persistentvolumeclaims;persistentvolumes;pods;replicationcontrollers;secrets;serviceaccounts;services,verbs=get;list;watch
98+
//+kubebuilder:rbac:groups=discovery.k8s.io,resources=endpointslices,verbs=get;list;watch
9899
//+kubebuilder:rbac:groups=batch,resources=cronjobs;jobs,verbs=get;list;watch
99100
//+kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch
100101
//+kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=get;list;watch

pkg/controllers/uiplugin/health_analyzer.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ func newHealthAnalyzerPrometheusRole(namespace string) *rbacv1.Role {
3939
Resources: []string{"services", "endpoints", "pods"},
4040
Verbs: []string{"get", "list", "watch"},
4141
},
42+
{
43+
APIGroups: []string{"discovery.k8s.io"},
44+
Resources: []string{"endpointslices"},
45+
Verbs: []string{"get", "list", "watch"},
46+
},
4247
},
4348
}
4449
return role

pkg/controllers/uiplugin/troubleshooting_panel.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ func korrel8rClusterRole(name string) *rbacv1.ClusterRole {
194194
Resources: []string{"configmaps", "endpoints", "events", "namespaces", "nodes", "pods", "persistentvolumeclaims", "persistentvolumes", "replicationcontrollers", "secrets", "serviceaccounts", "services"},
195195
Verbs: []string{"get", "list", "watch"},
196196
},
197+
{
198+
APIGroups: []string{"discovery.k8s.io"},
199+
Resources: []string{"endpointslices"},
200+
Verbs: []string{"get", "list", "watch"},
201+
},
197202
{
198203
APIGroups: []string{"rbac.authorization.k8s.io"},
199204
Resources: []string{"roles", "rolebindings", "clusterroles", "clusterrolebindings"},

0 commit comments

Comments
 (0)