Skip to content

Commit 2b71277

Browse files
csvirimetacosm
andcommitted
improve: logging for resource filter cache (#3167)
Signed-off-by: Attila Mészáros <a_meszaros@apple.com> Co-authored-by: Chris Laprun <metacosm@gmail.com>
1 parent 6cb03b0 commit 2b71277

File tree

16 files changed

+242
-149
lines changed

16 files changed

+242
-149
lines changed

docs/content/en/docs/documentation/observability.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@ parts of reconciliation logic and during the execution of the controller:
3333

3434
For more information about MDC see this [link](https://www.baeldung.com/mdc-in-log4j-2-logback).
3535

36+
### MDC entries during event handling
37+
38+
Although, usually users might not require it in their day-to-day workflow, it is worth mentioning that
39+
there are additional MDC entries managed for event handling. Typically, you might be interested in it
40+
in your `SecondaryToPrimaryMapper` related logs.
41+
For `InformerEventSource` and `ControllerEventSource` the following information is present:
42+
43+
| MDC Key | Value from Resource from the Event |
44+
|:-----------------------------------------------|:-------------------------------------------------|
45+
| `eventsource.event.resource.name` | `.metadata.name` |
46+
| `eventsource.event.resource.uid` | `.metadata.uid` |
47+
| `eventsource.event.resource.namespace` | `.metadata.namespace` |
48+
| `eventsource.event.resource.kind` | resource kind |
49+
| `eventsource.event.resource.resourceVersion` | `.metadata.resourceVersion` |
50+
| `eventsource.event.action` | action name (e.g. `ADDED`, `UPDATED`, `DELETED`) |
51+
| `eventsource.name` | name of the event source |
52+
53+
### Disabling MDC support
54+
55+
MDC support is enabled by default. If you want to disable it, you can set the `JAVA_OPERATOR_SDK_USE_MDC` environment
56+
variable to `false` when you start your operator.
57+
3658
## Metrics
3759

3860
JOSDK provides built-in support for metrics reporting on what is happening with your reconcilers in the form of

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/MDCUtils.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import io.fabric8.kubernetes.api.model.HasMetadata;
2121
import io.javaoperatorsdk.operator.api.config.Utils;
2222
import io.javaoperatorsdk.operator.processing.event.ResourceID;
23+
import io.javaoperatorsdk.operator.processing.event.source.ResourceAction;
2324

2425
public class MDCUtils {
2526

@@ -34,6 +35,72 @@ public class MDCUtils {
3435
private static final boolean enabled =
3536
Utils.getBooleanFromSystemPropsOrDefault(Utils.USE_MDC_ENV_KEY, true);
3637

38+
private static final String EVENT_RESOURCE_NAME = "eventsource.event.resource.name";
39+
private static final String EVENT_RESOURCE_UID = "eventsource.event.resource.uid";
40+
private static final String EVENT_RESOURCE_NAMESPACE = "eventsource.event.resource.namespace";
41+
private static final String EVENT_RESOURCE_KIND = "eventsource.event.resource.kind";
42+
private static final String EVENT_RESOURCE_VERSION = "eventsource.event.resource.resourceVersion";
43+
private static final String EVENT_ACTION = "eventsource.event.action";
44+
private static final String EVENT_SOURCE_NAME = "eventsource.name";
45+
46+
public static void addInformerEventInfo(
47+
HasMetadata resource, ResourceAction action, String eventSourceName) {
48+
if (enabled) {
49+
MDC.put(EVENT_RESOURCE_NAME, resource.getMetadata().getName());
50+
MDC.put(EVENT_RESOURCE_NAMESPACE, resource.getMetadata().getNamespace());
51+
MDC.put(EVENT_RESOURCE_KIND, HasMetadata.getKind(resource.getClass()));
52+
MDC.put(EVENT_RESOURCE_VERSION, resource.getMetadata().getResourceVersion());
53+
MDC.put(EVENT_RESOURCE_UID, resource.getMetadata().getUid());
54+
MDC.put(EVENT_ACTION, action == null ? null : action.name());
55+
MDC.put(EVENT_SOURCE_NAME, eventSourceName);
56+
}
57+
}
58+
59+
public static void removeInformerEventInfo() {
60+
if (enabled) {
61+
MDC.remove(EVENT_RESOURCE_NAME);
62+
MDC.remove(EVENT_RESOURCE_NAMESPACE);
63+
MDC.remove(EVENT_RESOURCE_KIND);
64+
MDC.remove(EVENT_RESOURCE_VERSION);
65+
MDC.remove(EVENT_RESOURCE_UID);
66+
MDC.remove(EVENT_ACTION);
67+
MDC.remove(EVENT_SOURCE_NAME);
68+
}
69+
}
70+
71+
public static void withMDCForEvent(
72+
HasMetadata resource, Runnable runnable, String eventSourceName) {
73+
withMDCForEvent(resource, null, runnable, eventSourceName);
74+
}
75+
76+
public static void withMDCForEvent(
77+
HasMetadata resource, ResourceAction action, Runnable runnable, String eventSourceName) {
78+
try {
79+
MDCUtils.addInformerEventInfo(resource, action, eventSourceName);
80+
runnable.run();
81+
} finally {
82+
MDCUtils.removeInformerEventInfo();
83+
}
84+
}
85+
86+
public static void withMDCForResourceID(ResourceID resourceID, Runnable runnable) {
87+
try {
88+
MDCUtils.addResourceIDInfo(resourceID);
89+
runnable.run();
90+
} finally {
91+
MDCUtils.removeResourceIDInfo();
92+
}
93+
}
94+
95+
public static void withMDCForPrimary(HasMetadata primary, Runnable runnable) {
96+
try {
97+
MDCUtils.addResourceInfo(primary);
98+
runnable.run();
99+
} finally {
100+
MDCUtils.removeResourceInfo();
101+
}
102+
}
103+
37104
public static void addResourceIDInfo(ResourceID resourceID) {
38105
if (enabled) {
39106
MDC.put(NAME, resourceID.getName());

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@
4444
import io.javaoperatorsdk.operator.processing.retry.Retry;
4545
import io.javaoperatorsdk.operator.processing.retry.RetryExecution;
4646

47-
import static io.javaoperatorsdk.operator.processing.KubernetesResourceUtils.getName;
48-
4947
public class EventProcessor<P extends HasMetadata> implements EventHandler, LifecycleAware {
5048

5149
private static final Logger log = LoggerFactory.getLogger(EventProcessor.class);
@@ -187,9 +185,8 @@ private void submitReconciliationExecution(ResourceState state) {
187185
executor.execute(new ReconcilerExecutor(resourceID, executionScope));
188186
} else {
189187
log.debug(
190-
"Skipping executing controller for resource id: {}. Controller in execution: {}. Latest"
188+
"Skipping executing controller. Controller in execution: {}. Latest"
191189
+ " Resource present: {}",
192-
resourceID,
193190
controllerUnderExecution,
194191
maybeLatest.isPresent());
195192
if (maybeLatest.isEmpty()) {
@@ -198,7 +195,7 @@ private void submitReconciliationExecution(ResourceState state) {
198195
// resource. Other is that simply there is no primary resource present for an event, this
199196
// might indicate issue with the implementation, but could happen also naturally, thus
200197
// this is not necessarily a problem.
201-
log.debug("no primary resource found in cache with resource id: {}", resourceID);
198+
log.debug("No primary resource found in cache with resource id: {}", resourceID);
202199
}
203200
}
204201
} finally {
@@ -209,7 +206,7 @@ private void submitReconciliationExecution(ResourceState state) {
209206
@SuppressWarnings("unchecked")
210207
private P getResourceFromState(ResourceState state) {
211208
if (triggerOnAllEvents()) {
212-
log.debug("Getting resource from state for {}", state.getId());
209+
log.debug("Getting resource from state");
213210
return (P) state.getLastKnownResource();
214211
} else {
215212
throw new IllegalStateException(
@@ -218,19 +215,17 @@ private P getResourceFromState(ResourceState state) {
218215
}
219216

220217
private void handleEventMarking(Event event, ResourceState state) {
221-
final var relatedCustomResourceID = event.getRelatedCustomResourceID();
222218
if (event instanceof ResourceEvent resourceEvent) {
223219
if (resourceEvent.getAction() == ResourceAction.DELETED) {
224-
log.debug("Marking delete event received for: {}", relatedCustomResourceID);
220+
log.debug("Marking delete event received");
225221
state.markDeleteEventReceived(
226222
resourceEvent.getResource().orElseThrow(),
227223
((ResourceDeleteEvent) resourceEvent).isDeletedFinalStateUnknown());
228224
} else {
229225
if (state.processedMarkForDeletionPresent() && isResourceMarkedForDeletion(resourceEvent)) {
230226
log.debug(
231227
"Skipping mark of event received, since already processed mark for deletion and"
232-
+ " resource marked for deletion: {}",
233-
relatedCustomResourceID);
228+
+ " resource marked for deletion");
234229
return;
235230
}
236231
// Normally when eventMarker is in state PROCESSED_MARK_FOR_DELETION it is expected to
@@ -260,8 +255,7 @@ private boolean isResourceMarkedForDeletion(ResourceEvent resourceEvent) {
260255

261256
private void handleRateLimitedSubmission(ResourceID resourceID, Duration minimalDuration) {
262257
var minimalDurationMillis = minimalDuration.toMillis();
263-
log.debug(
264-
"Rate limited resource: {}, rescheduled in {} millis", resourceID, minimalDurationMillis);
258+
log.debug("Rate limited resource; rescheduled in {} millis", minimalDurationMillis);
265259
retryEventSource()
266260
.scheduleOnce(
267261
resourceID, Math.max(minimalDurationMillis, MINIMAL_RATE_LIMIT_RESCHEDULE_DURATION));
@@ -334,7 +328,7 @@ private void reScheduleExecutionIfInstructed(
334328
.ifPresentOrElse(
335329
delay -> {
336330
var resourceID = ResourceID.fromResource(customResource);
337-
log.debug("Rescheduling event for resource: {} with delay: {}", resourceID, delay);
331+
log.debug("Rescheduling event with delay: {}", delay);
338332
retryEventSource().scheduleOnce(resourceID, delay);
339333
},
340334
() -> scheduleExecutionForMaxReconciliationInterval(customResource));
@@ -347,11 +341,7 @@ private void scheduleExecutionForMaxReconciliationInterval(P customResource) {
347341
m -> {
348342
var resourceID = ResourceID.fromResource(customResource);
349343
var delay = m.toMillis();
350-
log.debug(
351-
"Rescheduling event for max reconciliation interval for resource: {} : "
352-
+ "with delay: {}",
353-
resourceID,
354-
delay);
344+
log.debug("Rescheduling event for max reconciliation interval with delay: {}", delay);
355345
retryEventSource().scheduleOnce(resourceID, delay);
356346
});
357347
}
@@ -375,16 +365,15 @@ private void handleRetryOnException(ExecutionScope<P> executionScope, Exception
375365

376366
retryAwareErrorLogging(state.getRetry(), eventPresent, exception, executionScope);
377367
if (eventPresent) {
378-
log.debug("New events exists for for resource id: {}", resourceID);
368+
log.debug("New events exist for resource id");
379369
submitReconciliationExecution(state);
380370
return;
381371
}
382372
Optional<Long> nextDelay = state.getRetry().nextDelay();
383373

384374
nextDelay.ifPresentOrElse(
385375
delay -> {
386-
log.debug(
387-
"Scheduling timer event for retry with delay:{} for resource: {}", delay, resourceID);
376+
log.debug("Scheduling timer event for retry with delay:{}", delay);
388377
metrics.failedReconciliation(executionScope.getResource(), exception, metricsMetadata);
389378
retryEventSource().scheduleOnce(resourceID, delay);
390379
},
@@ -425,8 +414,7 @@ private void retryAwareErrorLogging(
425414
}
426415

427416
private void cleanupOnSuccessfulExecution(ExecutionScope<P> executionScope) {
428-
log.debug(
429-
"Cleanup for successful execution for resource: {}", getName(executionScope.getResource()));
417+
log.debug("Cleanup for successful execution");
430418
if (isRetryConfigured()) {
431419
resourceStateManager.getOrCreate(executionScope.getResourceID()).setRetry(null);
432420
}
@@ -444,7 +432,7 @@ private ResourceState getOrInitRetryExecution(ExecutionScope<P> executionScope)
444432
}
445433

446434
private void cleanupForDeletedEvent(ResourceID resourceID) {
447-
log.debug("Cleaning up for delete event for: {}", resourceID);
435+
log.debug("Cleaning up for delete event");
448436
resourceStateManager.remove(resourceID);
449437
metrics.cleanupDoneFor(resourceID, metricsMetadata);
450438
}
@@ -509,6 +497,7 @@ public void run() {
509497
log.debug("Event processor not running skipping resource processing: {}", resourceID);
510498
return;
511499
}
500+
MDCUtils.addResourceIDInfo(resourceID);
512501
log.debug("Running reconcile executor for: {}", executionScope);
513502
// change thread name for easier debugging
514503
final var thread = Thread.currentThread();
@@ -518,9 +507,7 @@ public void run() {
518507
var actualResource = cache.get(resourceID);
519508
if (actualResource.isEmpty()) {
520509
if (triggerOnAllEvents()) {
521-
log.debug(
522-
"Resource not found in the cache, checking for delete event resource: {}",
523-
resourceID);
510+
log.debug("Resource not found in the cache, checking for delete event resource");
524511
if (executionScope.isDeleteEvent()) {
525512
var state = resourceStateManager.get(resourceID);
526513
actualResource =
@@ -538,7 +525,7 @@ public void run() {
538525
return;
539526
}
540527
} else {
541-
log.debug("Skipping execution; primary resource missing from cache: {}", resourceID);
528+
log.debug("Skipping execution; primary resource missing from cache");
542529
return;
543530
}
544531
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/ReconciliationDispatcher.java

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,13 @@ PostExecutionControl<P> handleDispatch(ExecutionScope<P> executionScope, Context
8383
throws Exception {
8484
P originalResource = executionScope.getResource();
8585
var resourceForExecution = cloneResource(originalResource);
86-
log.debug(
87-
"Handling dispatch for resource name: {} namespace: {}",
88-
getName(originalResource),
89-
originalResource.getMetadata().getNamespace());
86+
log.debug("Handling dispatch");
9087

9188
final var markedForDeletion = originalResource.isMarkedForDeletion();
9289
if (!triggerOnAllEvents()
9390
&& markedForDeletion
9491
&& shouldNotDispatchToCleanupWhenMarkedForDeletion(originalResource)) {
95-
log.debug(
96-
"Skipping cleanup of resource {} because finalizer(s) {} don't allow processing yet",
97-
getName(originalResource),
98-
originalResource.getMetadata().getFinalizers());
92+
log.debug("Skipping cleanup because finalizer(s) don't allow processing yet");
9993
return PostExecutionControl.defaultDispatch();
10094
}
10195
// context can be provided only for testing purposes
@@ -165,11 +159,7 @@ private PostExecutionControl<P> reconcileExecution(
165159
P originalResource,
166160
Context<P> context)
167161
throws Exception {
168-
log.debug(
169-
"Reconciling resource {} with version: {} with execution scope: {}",
170-
getName(resourceForExecution),
171-
getVersion(resourceForExecution),
172-
executionScope);
162+
log.debug("Reconciling resource execution scope: {}", executionScope);
173163

174164
UpdateControl<P> updateControl = controller.reconcile(resourceForExecution, context);
175165

@@ -246,9 +236,8 @@ public boolean isLastAttempt() {
246236
exceptionLevel = Level.DEBUG;
247237
failedMessage = " due to conflict";
248238
log.info(
249-
"ErrorStatusUpdateControl.patchStatus of {} failed due to a conflict, but the next"
250-
+ " reconciliation is imminent.",
251-
ResourceID.fromResource(originalResource));
239+
"ErrorStatusUpdateControl.patchStatus failed due to a conflict, but the next"
240+
+ " reconciliation is imminent");
252241
} else {
253242
exceptionLevel = Level.WARN;
254243
failedMessage = ", but will be retried soon,";
@@ -312,10 +301,7 @@ private void updatePostExecutionControlWithReschedule(
312301
private PostExecutionControl<P> handleCleanup(
313302
P resourceForExecution, Context<P> context, ExecutionScope<P> executionScope) {
314303
if (log.isDebugEnabled()) {
315-
log.debug(
316-
"Executing delete for resource: {} with version: {}",
317-
ResourceID.fromResource(resourceForExecution),
318-
getVersion(resourceForExecution));
304+
log.debug("Executing delete for resource");
319305
}
320306
DeleteControl deleteControl = controller.cleanup(resourceForExecution, context);
321307
final var useFinalizer = controller.useFinalizer();
@@ -329,10 +315,7 @@ private PostExecutionControl<P> handleCleanup(
329315
}
330316
}
331317
log.debug(
332-
"Skipping finalizer remove for resource: {} with version: {}. delete control: {}, uses"
333-
+ " finalizer: {}",
334-
getUID(resourceForExecution),
335-
getVersion(resourceForExecution),
318+
"Skipping finalizer remove for resource. Delete control: {}, uses finalizer: {}",
336319
deleteControl,
337320
useFinalizer);
338321
PostExecutionControl<P> postExecutionControl = PostExecutionControl.defaultDispatch();
@@ -342,11 +325,7 @@ private PostExecutionControl<P> handleCleanup(
342325

343326
private P patchResource(Context<P> context, P resource, P originalResource) {
344327
if (log.isDebugEnabled()) {
345-
log.debug(
346-
"Updating resource: {} with version: {}; SSA: {}",
347-
resource.getMetadata().getName(),
348-
getVersion(resource),
349-
useSSA);
328+
log.debug("Updating resource; with SSA: {}", useSSA);
350329
}
351330
log.trace("Resource before update: {}", resource);
352331

@@ -384,10 +363,7 @@ public CustomResourceFacade(ControllerConfiguration<R> configuration, Cloner clo
384363

385364
public R patchResource(Context<R> context, R resource, R originalResource) {
386365
if (log.isDebugEnabled()) {
387-
log.debug(
388-
"Trying to replace resource {}, version: {}",
389-
ResourceID.fromResource(resource),
390-
resource.getMetadata().getResourceVersion());
366+
log.debug("Trying to replace resource");
391367
}
392368
if (useSSA) {
393369
return context.resourceOperations().serverSideApplyPrimary(resource);

0 commit comments

Comments
 (0)