Skip to content

Commit 87fd299

Browse files
committed
wip
Signed-off-by: Attila Mészáros <a_meszaros@apple.com>
1 parent 4f38ca9 commit 87fd299

File tree

2 files changed

+169
-23
lines changed

2 files changed

+169
-23
lines changed

micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.concurrent.ScheduledExecutorService;
2222
import java.util.concurrent.TimeUnit;
2323
import java.util.concurrent.atomic.AtomicInteger;
24+
import java.util.function.Consumer;
2425

2526
import org.jspecify.annotations.NonNull;
2627

@@ -75,6 +76,7 @@ public class MicrometerMetrics implements Metrics {
7576
private final MeterRegistry registry;
7677
private final Map<String, AtomicInteger> gauges = new ConcurrentHashMap<>();
7778
private final Cleaner cleaner;
79+
private final Consumer<Timer.Builder> timerConfig;
7880

7981
/**
8082
* Creates a MicrometerMetrics instance configured to not collect per-resource metrics, just
@@ -84,7 +86,7 @@ public class MicrometerMetrics implements Metrics {
8486
* @return a MicrometerMetrics instance configured to not collect per-resource metrics
8587
*/
8688
public static MicrometerMetrics withoutPerResourceMetrics(MeterRegistry registry) {
87-
return new MicrometerMetrics(registry, Cleaner.NOOP, false);
89+
return new MicrometerMetrics(registry, Cleaner.NOOP, false, null);
8890
}
8991

9092
/**
@@ -108,7 +110,7 @@ public static MicrometerMetricsBuilder newMicrometerMetricsBuilder(MeterRegistry
108110
*/
109111
public static PerResourceCollectingMicrometerMetricsBuilder
110112
newPerResourceCollectingMicrometerMetricsBuilder(MeterRegistry registry) {
111-
return new PerResourceCollectingMicrometerMetricsBuilder(registry);
113+
return new PerResourceCollectingMicrometerMetricsBuilder(registry, null);
112114
}
113115

114116
/**
@@ -119,12 +121,21 @@ public static MicrometerMetricsBuilder newMicrometerMetricsBuilder(MeterRegistry
119121
* @param registry the {@link MeterRegistry} instance to use for metrics recording
120122
* @param cleaner the {@link Cleaner} to use
121123
* @param collectingPerResourceMetrics whether to collect per resource metrics
124+
* @param timerConfig optional configuration for timers, defaults to publishing percentiles 0.5,
125+
* 0.95, 0.99 and histogram
122126
*/
123127
private MicrometerMetrics(
124-
MeterRegistry registry, Cleaner cleaner, boolean collectingPerResourceMetrics) {
128+
MeterRegistry registry,
129+
Cleaner cleaner,
130+
boolean collectingPerResourceMetrics,
131+
Consumer<Timer.Builder> timerConfig) {
125132
this.registry = registry;
126133
this.cleaner = cleaner;
127134
this.collectPerResourceMetrics = collectingPerResourceMetrics;
135+
this.timerConfig =
136+
timerConfig != null
137+
? timerConfig
138+
: builder -> builder.publishPercentiles(0.5, 0.95, 0.99).publishPercentileHistogram();
128139
}
129140

130141
@Override
@@ -163,12 +174,9 @@ public <T> T timeControllerExecution(ControllerExecution<T> execution) {
163174
final var tags = new ArrayList<Tag>(16);
164175
tags.add(Tag.of(CONTROLLER, name));
165176
addMetadataTags(resourceID, metadata, tags, true);
166-
final var timer =
167-
Timer.builder(execName)
168-
.tags(tags)
169-
.publishPercentiles(0.3, 0.5, 0.95)
170-
.publishPercentileHistogram()
171-
.register(registry);
177+
final var timerBuilder = Timer.builder(execName).tags(tags);
178+
timerConfig.accept(timerBuilder);
179+
final var timer = timerBuilder.register(registry);
172180
try {
173181
final var result =
174182
timer.record(
@@ -379,8 +387,27 @@ public static class PerResourceCollectingMicrometerMetricsBuilder
379387
private int cleaningThreadsNumber;
380388
private int cleanUpDelayInSeconds;
381389

382-
private PerResourceCollectingMicrometerMetricsBuilder(MeterRegistry registry) {
390+
private PerResourceCollectingMicrometerMetricsBuilder(
391+
MeterRegistry registry, Consumer<Timer.Builder> timerConfig) {
383392
super(registry);
393+
this.executionTimerConfig = timerConfig;
394+
}
395+
396+
/**
397+
* Configures the Timer used for timing controller executions. By default, timers are configured
398+
* to publish percentiles 0.5, 0.95, 0.99 and a percentile histogram. You can set: {@code
399+
* .minimumExpectedValue(Duration.ofMillis(...)).maximumExpectedValue(Duration.ofSeconds(...)) }
400+
* so micrometer can create the buckets for you.
401+
*
402+
* @param executionTimerConfig a consumer that will configure the Timer.Builder. The builder
403+
* will already have the metric name and tags set.
404+
* @return this builder for method chaining
405+
*/
406+
@Override
407+
public PerResourceCollectingMicrometerMetricsBuilder withExecutionTimerConfig(
408+
Consumer<Timer.Builder> executionTimerConfig) {
409+
this.executionTimerConfig = executionTimerConfig;
410+
return this;
384411
}
385412

386413
/**
@@ -412,23 +439,38 @@ public PerResourceCollectingMicrometerMetricsBuilder withCleanUpDelayInSeconds(
412439
public MicrometerMetrics build() {
413440
final var cleaner =
414441
new DelayedCleaner(registry, cleanUpDelayInSeconds, cleaningThreadsNumber);
415-
return new MicrometerMetrics(registry, cleaner, true);
442+
return new MicrometerMetrics(registry, cleaner, true, executionTimerConfig);
416443
}
417444
}
418445

419446
public static class MicrometerMetricsBuilder {
420447
protected final MeterRegistry registry;
421448
private boolean collectingPerResourceMetrics = true;
449+
protected Consumer<Timer.Builder> executionTimerConfig = null;
422450

423451
private MicrometerMetricsBuilder(MeterRegistry registry) {
424452
this.registry = registry;
425453
}
426454

455+
/**
456+
* Configures the Timer used for timing controller executions. By default, timers are configured
457+
* to publish percentiles 0.5, 0.95, 0.99 and a percentile histogram.
458+
*
459+
* @param executionTimerConfig a consumer that will configure the Timer.Builder. The builder
460+
* will already have the metric name and tags set.
461+
* @return this builder for method chaining
462+
*/
463+
public MicrometerMetricsBuilder withExecutionTimerConfig(
464+
Consumer<Timer.Builder> executionTimerConfig) {
465+
this.executionTimerConfig = executionTimerConfig;
466+
return this;
467+
}
468+
427469
/** Configures the instance to collect metrics on a per-resource basis. */
428470
@SuppressWarnings("unused")
429471
public PerResourceCollectingMicrometerMetricsBuilder collectingMetricsPerResource() {
430472
collectingPerResourceMetrics = true;
431-
return new PerResourceCollectingMicrometerMetricsBuilder(registry);
473+
return new PerResourceCollectingMicrometerMetricsBuilder(registry, executionTimerConfig);
432474
}
433475

434476
/**
@@ -442,7 +484,8 @@ public MicrometerMetricsBuilder notCollectingMetricsPerResource() {
442484
}
443485

444486
public MicrometerMetrics build() {
445-
return new MicrometerMetrics(registry, Cleaner.NOOP, collectingPerResourceMetrics);
487+
return new MicrometerMetrics(
488+
registry, Cleaner.NOOP, collectingPerResourceMetrics, executionTimerConfig);
446489
}
447490
}
448491

observability/josdk-operator-metrics-dashboard.json

Lines changed: 113 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@
992992
"type": "prometheus",
993993
"uid": "prometheus"
994994
},
995-
"description": "Reconciliation retry information",
995+
"description": "Current retry attempt number for resources being retried",
996996
"fieldConfig": {
997997
"defaults": {
998998
"color": {
@@ -1018,7 +1018,7 @@
10181018
"scaleDistribution": {
10191019
"type": "linear"
10201020
},
1021-
"showPoints": "never",
1021+
"showPoints": "auto",
10221022
"spanNulls": false,
10231023
"stacking": {
10241024
"group": "A",
@@ -1035,10 +1035,18 @@
10351035
{
10361036
"color": "green",
10371037
"value": null
1038+
},
1039+
{
1040+
"color": "yellow",
1041+
"value": 1
1042+
},
1043+
{
1044+
"color": "red",
1045+
"value": 3
10381046
}
10391047
]
10401048
},
1041-
"unit": "ops"
1049+
"unit": "short"
10421050
},
10431051
"overrides": []
10441052
},
@@ -1051,7 +1059,7 @@
10511059
"id": 12,
10521060
"options": {
10531061
"legend": {
1054-
"calcs": ["last", "mean"],
1062+
"calcs": ["last", "max"],
10551063
"displayMode": "table",
10561064
"placement": "bottom",
10571065
"showLegend": true
@@ -1068,24 +1076,119 @@
10681076
"uid": "prometheus"
10691077
},
10701078
"editorMode": "code",
1071-
"expr": "sum(rate(operator_sdk_reconciliations_started_total{service_name=\"josdk\", operator_sdk_reconciliations_retries_last=\"true\"}[5m]))",
1072-
"legendFormat": "Last Retry Attempts",
1079+
"expr": "operator_sdk_reconciliations_retries_number{service_name=\"josdk\"}",
1080+
"legendFormat": "{{kind}}/{{name}} ({{namespace}})",
10731081
"range": true,
10741082
"refId": "A"
1083+
}
1084+
],
1085+
"title": "Reconciliation Retry Attempts",
1086+
"type": "timeseries"
1087+
},
1088+
{
1089+
"datasource": {
1090+
"type": "prometheus",
1091+
"uid": "prometheus"
1092+
},
1093+
"description": "Resources currently on their last retry attempt (1 = last attempt, 0 = not last or no retry)",
1094+
"fieldConfig": {
1095+
"defaults": {
1096+
"color": {
1097+
"mode": "palette-classic"
1098+
},
1099+
"custom": {
1100+
"axisCenteredZero": false,
1101+
"axisColorMode": "text",
1102+
"axisLabel": "",
1103+
"axisPlacement": "auto",
1104+
"barAlignment": 0,
1105+
"drawStyle": "line",
1106+
"fillOpacity": 10,
1107+
"gradientMode": "none",
1108+
"hideFrom": {
1109+
"tooltip": false,
1110+
"viz": false,
1111+
"legend": false
1112+
},
1113+
"lineInterpolation": "stepAfter",
1114+
"lineWidth": 1,
1115+
"pointSize": 5,
1116+
"scaleDistribution": {
1117+
"type": "linear"
1118+
},
1119+
"showPoints": "auto",
1120+
"spanNulls": false,
1121+
"stacking": {
1122+
"group": "A",
1123+
"mode": "none"
1124+
},
1125+
"thresholdsStyle": {
1126+
"mode": "off"
1127+
}
1128+
},
1129+
"mappings": [
1130+
{
1131+
"options": {
1132+
"0": {
1133+
"text": "No"
1134+
},
1135+
"1": {
1136+
"text": "Yes"
1137+
}
1138+
},
1139+
"type": "value"
1140+
}
1141+
],
1142+
"thresholds": {
1143+
"mode": "absolute",
1144+
"steps": [
1145+
{
1146+
"color": "green",
1147+
"value": null
1148+
},
1149+
{
1150+
"color": "red",
1151+
"value": 1
1152+
}
1153+
]
1154+
},
1155+
"unit": "short"
10751156
},
1157+
"overrides": []
1158+
},
1159+
"gridPos": {
1160+
"h": 8,
1161+
"w": 12,
1162+
"x": 0,
1163+
"y": 40
1164+
},
1165+
"id": 13,
1166+
"options": {
1167+
"legend": {
1168+
"calcs": ["last"],
1169+
"displayMode": "table",
1170+
"placement": "bottom",
1171+
"showLegend": true
1172+
},
1173+
"tooltip": {
1174+
"mode": "single",
1175+
"sort": "none"
1176+
}
1177+
},
1178+
"targets": [
10761179
{
10771180
"datasource": {
10781181
"type": "prometheus",
10791182
"uid": "prometheus"
10801183
},
10811184
"editorMode": "code",
1082-
"expr": "sum(rate(operator_sdk_reconciliations_started_total{service_name=\"josdk\", operator_sdk_reconciliations_retries_last=\"false\"}[5m]))",
1083-
"legendFormat": "Retries (Not Last)",
1185+
"expr": "operator_sdk_reconciliations_retries_last{service_name=\"josdk\"}",
1186+
"legendFormat": "{{kind}}/{{name}} ({{namespace}})",
10841187
"range": true,
1085-
"refId": "B"
1188+
"refId": "A"
10861189
}
10871190
],
1088-
"title": "Reconciliation Retry Rate",
1191+
"title": "Resources on Last Retry Attempt",
10891192
"type": "timeseries"
10901193
}
10911194
],

0 commit comments

Comments
 (0)