Skip to content

Commit a804406

Browse files
committed
Refinements on metrics
- deleted redundant delete events - adjusted metrics now we have queue and active reconciliations (with the intuitive seantics) Signed-off-by: Attila Mészáros <a_meszaros@apple.com>
1 parent cf114e3 commit a804406

File tree

3 files changed

+24
-118
lines changed

3 files changed

+24
-118
lines changed

docs/content/en/docs/documentation/observability.md

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -112,18 +112,17 @@ Metrics metrics = MicrometerMetricsV2.newMicrometerMetricsV2Builder(registry)
112112
All meters use `controller.name` as their primary tag. Counters optionally carry a `namespace` tag when
113113
`withNamespaceAsTag()` is enabled.
114114

115-
| Meter name (Micrometer) | Type | Tags | Description |
116-
|------------------------------------------|---------|-----------------------------------|----------------------------------------------------------------------|
117-
| `reconciliations.executions` | gauge | `controller.name` | Number of reconciler executions currently in progress |
118-
| `reconciliations.active` | gauge | `controller.name` | Number of resources currently queued for reconciliation |
119-
| `custom_resources` | gauge | `controller.name` | Number of custom resources tracked by the controller |
120-
| `reconciliations.execution.duration` | timer | `controller.name` | Reconciliation execution duration with explicit SLO bucket histogram |
121-
| `reconciliations.started.total` | counter | `controller.name`, `namespace`* | Number of reconciliations started (including retries) |
122-
| `reconciliations.success.total` | counter | `controller.name`, `namespace`* | Number of successfully finished reconciliations |
123-
| `reconciliations.failure.total` | counter | `controller.name`, `namespace`* | Number of failed reconciliations |
124-
| `reconciliations.retries.total` | counter | `controller.name`, `namespace`* | Number of reconciliation retries |
125-
| `events.received` | counter | `controller.name`, `event`, `action`, `namespace`* | Number of Kubernetes events received by the controller |
126-
| `events.delete` | counter | `controller.name`, `namespace`* | Number of resource deletion events processed |
115+
| Meter name (Micrometer) | Type | Tags | Description |
116+
|--------------------------------------|---------|---------------------------------------------------|------------------------------------------------------------------------|
117+
| `reconciliations.active` | gauge | `controller.name` | Number of reconciler executions currently executing |
118+
| `reconciliations.queue` | gauge | `controller.name` | Number of resources currently queued for reconciliation or reconciling |
119+
| `custom_resources` | gauge | `controller.name` | Number of custom resources tracked by the controller |
120+
| `reconciliations.execution.duration` | timer | `controller.name` | Reconciliation execution duration with explicit SLO bucket histogram |
121+
| `reconciliations.started.total` | counter | `controller.name`, `namespace`* | Number of reconciliations started (including retries) |
122+
| `reconciliations.success.total` | counter | `controller.name`, `namespace`* | Number of successfully finished reconciliations |
123+
| `reconciliations.failure.total` | counter | `controller.name`, `namespace`* | Number of failed reconciliations |
124+
| `reconciliations.retries.total` | counter | `controller.name`, `namespace`* | Number of reconciliation retries |
125+
| `events.received` | counter | `controller.name`, `event`, `action`, `namespace` | Number of Kubernetes events received by the controller |
127126

128127
\* `namespace` tag is only included when `withNamespaceAsTag()` is enabled.
129128

micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetricsV2.java

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,27 +48,26 @@ public class MicrometerMetricsV2 implements Metrics {
4848
private static final String EVENT = "event";
4949
private static final String ACTION = "action";
5050
private static final String EVENTS_RECEIVED = "events.received";
51-
private static final String EVENTS_DELETE = "events.delete";
5251
private static final String UNKNOWN_ACTION = "UNKNOWN";
5352
public static final String TOTAL_SUFFIX = ".total";
5453
private static final String SUCCESS_SUFFIX = "success";
5554
private static final String FAILURE_SUFFIX = "failure";
5655

5756
private static final String RECONCILIATIONS = "reconciliations.";
5857

59-
private static final String RECONCILIATIONS_FAILED =
58+
public static final String RECONCILIATIONS_FAILED =
6059
RECONCILIATIONS + FAILURE_SUFFIX + TOTAL_SUFFIX;
61-
private static final String RECONCILIATIONS_SUCCESS =
60+
public static final String RECONCILIATIONS_SUCCESS =
6261
RECONCILIATIONS + SUCCESS_SUFFIX + TOTAL_SUFFIX;
63-
private static final String RECONCILIATIONS_RETRIES_NUMBER =
62+
public static final String RECONCILIATIONS_RETRIES_NUMBER =
6463
RECONCILIATIONS + "retries" + TOTAL_SUFFIX;
65-
private static final String RECONCILIATIONS_STARTED = RECONCILIATIONS + "started" + TOTAL_SUFFIX;
64+
public static final String RECONCILIATIONS_STARTED = RECONCILIATIONS + "started" + TOTAL_SUFFIX;
6665

67-
private static final String RECONCILIATIONS_EXECUTIONS_GAUGE = RECONCILIATIONS + "executions";
68-
private static final String RECONCILIATIONS_QUEUE_SIZE_GAUGE = RECONCILIATIONS + "active";
69-
private static final String NUMBER_OF_RESOURCE_GAUGE = "custom_resources";
66+
public static final String RECONCILIATIONS_EXECUTIONS_GAUGE = RECONCILIATIONS + "active";
67+
public static final String RECONCILIATIONS_QUEUE_SIZE_GAUGE = RECONCILIATIONS + "queue";
68+
public static final String NUMBER_OF_RESOURCE_GAUGE = "custom_resources";
7069

71-
private static final String RECONCILIATION_EXECUTION_DURATION =
70+
public static final String RECONCILIATION_EXECUTION_DURATION =
7271
RECONCILIATIONS + "execution.duration";
7372

7473
private final MeterRegistry registry;
@@ -195,7 +194,6 @@ public void eventReceived(Event event, Map<String, Object> metadata) {
195194
@Override
196195
public void cleanupDone(ResourceID resourceID, Map<String, Object> metadata) {
197196
gauges.get(numberOfResourcesRefName(getControllerName(metadata))).decrementAndGet();
198-
incrementCounter(EVENTS_DELETE, resourceID.getNamespace().orElse(null), metadata);
199197
}
200198

201199
@Override
@@ -226,6 +224,9 @@ public void reconciliationStarted(HasMetadata resource, Map<String, Object> meta
226224
var reconcilerExecutions =
227225
gauges.get(reconciliationExecutionGaugeRefKey(getControllerName(metadata)));
228226
reconcilerExecutions.incrementAndGet();
227+
var controllerQueueSize =
228+
gauges.get(controllerQueueSizeGaugeRefKey(metadata.get(CONTROLLER_NAME).toString()));
229+
controllerQueueSize.decrementAndGet();
229230
}
230231

231232
@Override
@@ -234,10 +235,6 @@ public void reconciliationFinished(
234235
var reconcilerExecutions =
235236
gauges.get(reconciliationExecutionGaugeRefKey(metadata.get(CONTROLLER_NAME).toString()));
236237
reconcilerExecutions.decrementAndGet();
237-
238-
var controllerQueueSize =
239-
gauges.get(controllerQueueSizeGaugeRefKey(metadata.get(CONTROLLER_NAME).toString()));
240-
controllerQueueSize.decrementAndGet();
241238
}
242239

243240
@Override

observability/josdk-operator-metrics-dashboard.json

Lines changed: 2 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@
328328
"uid": "prometheus"
329329
},
330330
"editorMode": "code",
331-
"expr": "sum(reconciliations_executions{service_name=~\"$service_name\"})",
331+
"expr": "sum(reconciliations_active{service_name=~\"$service_name\"})",
332332
"legendFormat": "Executing",
333333
"range": true,
334334
"refId": "A"
@@ -395,7 +395,7 @@
395395
"uid": "prometheus"
396396
},
397397
"editorMode": "code",
398-
"expr": "sum(reconciliations_active{service_name=~\"$service_name\"})",
398+
"expr": "sum(reconciliations_queue{service_name=~\"$service_name\"})",
399399
"legendFormat": "Active",
400400
"range": true,
401401
"refId": "A"
@@ -876,96 +876,6 @@
876876
"title": "Failures by Controller",
877877
"type": "timeseries"
878878
},
879-
{
880-
"datasource": {
881-
"type": "prometheus",
882-
"uid": "prometheus"
883-
},
884-
"description": "Rate of delete events received",
885-
"fieldConfig": {
886-
"defaults": {
887-
"color": {
888-
"mode": "palette-classic"
889-
},
890-
"custom": {
891-
"axisCenteredZero": false,
892-
"axisColorMode": "text",
893-
"axisLabel": "",
894-
"axisPlacement": "auto",
895-
"barAlignment": 0,
896-
"drawStyle": "line",
897-
"fillOpacity": 10,
898-
"gradientMode": "none",
899-
"hideFrom": {
900-
"tooltip": false,
901-
"viz": false,
902-
"legend": false
903-
},
904-
"lineInterpolation": "linear",
905-
"lineWidth": 1,
906-
"pointSize": 5,
907-
"scaleDistribution": {
908-
"type": "linear"
909-
},
910-
"showPoints": "never",
911-
"spanNulls": false,
912-
"stacking": {
913-
"group": "A",
914-
"mode": "none"
915-
},
916-
"thresholdsStyle": {
917-
"mode": "off"
918-
}
919-
},
920-
"mappings": [],
921-
"thresholds": {
922-
"mode": "absolute",
923-
"steps": [
924-
{
925-
"color": "green",
926-
"value": null
927-
}
928-
]
929-
},
930-
"unit": "ops"
931-
},
932-
"overrides": []
933-
},
934-
"gridPos": {
935-
"h": 8,
936-
"w": 12,
937-
"x": 12,
938-
"y": 32
939-
},
940-
"id": 11,
941-
"options": {
942-
"legend": {
943-
"calcs": ["last", "sum"],
944-
"displayMode": "table",
945-
"placement": "bottom",
946-
"showLegend": true
947-
},
948-
"tooltip": {
949-
"mode": "single",
950-
"sort": "none"
951-
}
952-
},
953-
"targets": [
954-
{
955-
"datasource": {
956-
"type": "prometheus",
957-
"uid": "prometheus"
958-
},
959-
"editorMode": "code",
960-
"expr": "sum(rate(events_delete_total{service_name=~\"$service_name\"}[5m])) by (controller_name)",
961-
"legendFormat": "{{controller_name}}",
962-
"range": true,
963-
"refId": "A"
964-
}
965-
],
966-
"title": "Delete Event Rate",
967-
"type": "timeseries"
968-
},
969879
{
970880
"datasource": {
971881
"type": "prometheus",

0 commit comments

Comments
 (0)