diff --git a/internal/controller/auto_namespace_monitoring_controller.go b/internal/controller/auto_namespace_monitoring_controller.go index b39508da..cae4e1df 100644 --- a/internal/controller/auto_namespace_monitoring_controller.go +++ b/internal/controller/auto_namespace_monitoring_controller.go @@ -247,7 +247,7 @@ func (r *AutoNamespaceMonitoringReconciler) ensureNamespaceWatchIsActive(labelSe if labelSelectorPredicate != nil { watchPredicates = append(watchPredicates, labelSelectorPredicate) } - logger.Debug("(re)creating the namespace watch") + logger.Debug("(re)creating the the namespace controller's namespace watch") if err = namespaceController.Watch( source.TypedKind[*corev1.Namespace, reconcile.Request]( r.manager.GetCache(), @@ -261,6 +261,28 @@ func (r *AutoNamespaceMonitoringReconciler) ensureNamespaceWatchIsActive(labelSe } logger.Info("successfully created a new watch for namespaces") + // Also watch for deletion of Dash0Monitoring resources. When a manually managed monitoring resource is removed from + // a namespace that should be auto-monitored, we want to create the auto-monitoring resource immediately rather than + // waiting for the next namespace reconcile. When an auto-monitoring is deleted by a user, we also want to recreate + // it. + logger.Debug("(re)creating the namespace controller's watch for monitoring resource deletions") + if err = namespaceController.Watch( + source.TypedKind[*dash0v1beta1.Dash0Monitoring, reconcile.Request]( + r.manager.GetCache(), + &dash0v1beta1.Dash0Monitoring{}, + handler.TypedEnqueueRequestsFromMapFunc[*dash0v1beta1.Dash0Monitoring, reconcile.Request]( + func(_ context.Context, mr *dash0v1beta1.Dash0Monitoring) []reconcile.Request { + return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: mr.Namespace}}} + }, + ), + &monitoringResourceDeletePredicate{}, + ), + ); err != nil { + logger.Error(err, "unable to create a new watch for monitoring resource deletions") + return + } + logger.Info("successfully created a new watch for monitoring resource deletions") + // start the controller backgroundCtx := context.Background() childContextForNamespaceController, stopNamespaceController := context.WithCancel(backgroundCtx) @@ -970,3 +992,26 @@ func namespaceMatchesLabelSelector(ns *corev1.Namespace, selectorStr string) boo } return selector.Matches(labels.Set(ns.Labels)) } + +// monitoringResourceDeletePredicate fires on the deletion of any Dash0Monitoring resource. Create/Update/Generic +// events are ignored. The triggered namespace reconcile is idempotent: it (re)creates the auto-managed resource +// only when the namespace should be auto-monitored, otherwise it is a no-op. Deletes initiated by the controller +// itself in deleteAllAutoMonitoringResourcesInCluster happen after the namespace watch has been stopped, so they +// do not feed back into this predicate. +type monitoringResourceDeletePredicate struct{} + +func (p *monitoringResourceDeletePredicate) Create(_ event.TypedCreateEvent[*dash0v1beta1.Dash0Monitoring]) bool { + return false +} + +func (p *monitoringResourceDeletePredicate) Update(_ event.TypedUpdateEvent[*dash0v1beta1.Dash0Monitoring]) bool { + return false +} + +func (p *monitoringResourceDeletePredicate) Delete(_ event.TypedDeleteEvent[*dash0v1beta1.Dash0Monitoring]) bool { + return true +} + +func (p *monitoringResourceDeletePredicate) Generic(_ event.TypedGenericEvent[*dash0v1beta1.Dash0Monitoring]) bool { + return false +} diff --git a/internal/controller/auto_namespace_monitoring_controller_test.go b/internal/controller/auto_namespace_monitoring_controller_test.go index 14717ee2..b4d38da3 100644 --- a/internal/controller/auto_namespace_monitoring_controller_test.go +++ b/internal/controller/auto_namespace_monitoring_controller_test.go @@ -13,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/reconcile" dash0common "github.com/dash0hq/dash0-operator/api/operator/common" @@ -299,7 +300,7 @@ var _ = Describe("The auto-namespace-monitoring controller", Ordered, func() { verifyNamespaceHasAutoMonitoringResource(ctx, Default, testAutoNamespace1) }) - It("does not create a Dash0Monitoring with dash0.com/auto-monitored-namespace when the namespace already contains non-auto monitoring resource", func() { + It("does not create a Dash0Monitoring resource with dash0.com/auto-monitored-namespace when the namespace already contains a non-auto monitoring resource", func() { createOperatorConfigurationResourceWithAutoMonitorNamespaces(ctx, new(true), "", nil) EnsureMonitoringResourceWithSpecExistsInNamespace( ctx, @@ -317,6 +318,49 @@ var _ = Describe("The auto-namespace-monitoring controller", Ordered, func() { Expect(hasAutoMonitoredLabel).To(BeFalse()) }) + It("creates the auto-monitoring resource when a manually managed monitoring resource is deleted", func() { + createOperatorConfigurationResourceWithAutoMonitorNamespaces(ctx, new(true), "", nil) + EnsureMonitoringResourceWithSpecExistsInNamespace( + ctx, + k8sClient, + MonitoringResourceDefaultSpec, + manualMonitoringResourceName, + ) + triggerNamespaceWatcherReconcile(ctx, namespaceWatcher, testAutoNamespace1) + + // While the manual resource is in place, no auto-monitoring resource is created. + monitoringResources := listMonitoringResources(ctx, Default, testAutoNamespace1) + Expect(monitoringResources).To(HaveLen(1)) + Expect(monitoringResources[0].Name).To(Equal(manualMonitoringResourceName.Name)) + + // Simulate the user deleting the manually managed monitoring resource. Once the watch on Dash0Monitoring + // deletions observes this, it enqueues a reconcile for the namespace; this test exercises that reconcile + // directly. + DeleteMonitoringResourceByName(ctx, k8sClient, manualMonitoringResourceName, false) + triggerNamespaceWatcherReconcile(ctx, namespaceWatcher, testAutoNamespace1) + + verifyNamespaceHasAutoMonitoringResource(ctx, Default, testAutoNamespace1) + }) + + It("recreates the auto-monitoring resource when the auto-managed resource is deleted by the user", func() { + createOperatorConfigurationResourceWithAutoMonitorNamespaces(ctx, new(true), "", nil) + triggerNamespaceWatcherReconcile(ctx, namespaceWatcher, testAutoNamespace1) + verifyNamespaceHasAutoMonitoringResource(ctx, Default, testAutoNamespace1) + + // Simulate the user deleting the auto-managed monitoring resource. The monitoringResourceDeletePredicate + // triggers a namespace reconcile, which should recreate the auto-managed resource. + autoMonitoringResourceName := types.NamespacedName{ + Name: util.MonitoringAutoResourceDefaultName, + Namespace: testAutoNamespace1, + } + DeleteMonitoringResourceByName(ctx, k8sClient, autoMonitoringResourceName, false) + Expect(listMonitoringResources(ctx, Default, testAutoNamespace1)).To(BeEmpty()) + + triggerNamespaceWatcherReconcile(ctx, namespaceWatcher, testAutoNamespace1) + + verifyNamespaceHasAutoMonitoringResource(ctx, Default, testAutoNamespace1) + }) + It("uses custom settings from the MonitoringTemplate when set", func() { createOperatorConfigurationResourceWithAutoMonitorNamespaces( ctx, @@ -1021,6 +1065,55 @@ var _ = Describe("The auto-namespace-monitoring controller", Ordered, func() { Expect(t2.Labels).To(Equal(map[string]string{"key": "value-2"})) }) }) + + Context("monitoringResourceDeletePredicate", func() { + var p monitoringResourceDeletePredicate + + It("ignores Create events", func() { + Expect(p.Create(event.TypedCreateEvent[*dash0v1beta1.Dash0Monitoring]{ + Object: &dash0v1beta1.Dash0Monitoring{}, + })).To(BeFalse()) + }) + + It("ignores Update events", func() { + Expect(p.Update(event.TypedUpdateEvent[*dash0v1beta1.Dash0Monitoring]{ + ObjectOld: &dash0v1beta1.Dash0Monitoring{}, + ObjectNew: &dash0v1beta1.Dash0Monitoring{}, + })).To(BeFalse()) + }) + + It("ignores Generic events", func() { + Expect(p.Generic(event.TypedGenericEvent[*dash0v1beta1.Dash0Monitoring]{ + Object: &dash0v1beta1.Dash0Monitoring{}, + })).To(BeFalse()) + }) + + It("fires on Delete of a manually managed monitoring resource (no auto-monitored label)", func() { + Expect(p.Delete(event.TypedDeleteEvent[*dash0v1beta1.Dash0Monitoring]{ + Object: &dash0v1beta1.Dash0Monitoring{}, + })).To(BeTrue()) + }) + + It("fires on Delete of a monitoring resource with unrelated labels", func() { + Expect(p.Delete(event.TypedDeleteEvent[*dash0v1beta1.Dash0Monitoring]{ + Object: &dash0v1beta1.Dash0Monitoring{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"some": "label"}, + }, + }, + })).To(BeTrue()) + }) + + It("fires on Delete of an auto-managed monitoring resource", func() { + Expect(p.Delete(event.TypedDeleteEvent[*dash0v1beta1.Dash0Monitoring]{ + Object: &dash0v1beta1.Dash0Monitoring{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{util.AutoMonitoredNamespaceLabel: util.TrueString}, + }, + }, + })).To(BeTrue()) + }) + }) }) func createOperatorConfigurationResourceWithAutoMonitorNamespaces( diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 427f47d0..767f0538 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -3202,10 +3202,26 @@ trace_statements: false, ) - // Part Five: Disable automatic namespace monitoring entirely. This deletes all remaining auto-monitoring + // Part Five: Delete the auto-monitoring resource in a namespace and verify that it is recreated automatically. + + By("auto-monitoring test, part V: deleting the auto-monitoring resource in namespaceExisting") + Expect(runAndIgnoreOutput( + exec.Command( + "kubectl", + "delete", + "--namespace", + namespaceExisting, + "dash0monitoring", + util.MonitoringAutoResourceDefaultName, + "--wait", + ))).To(Succeed()) + + waitForMonitoringResourceToBecomeAvailable(namespaceExisting, util.MonitoringAutoResourceDefaultName) + + // Part Six: Disable automatic namespace monitoring entirely. This deletes all remaining auto-monitoring // resources, and the workloads in those namespaces will be uninstrumented. - By("auto-monitoring test, part V: disabling automatic namespace monitoring") + By("auto-monitoring test, part VI: disabling automatic namespace monitoring") updateOperatorConfigurationAutoNamespaceMonitoringEnabled(false) // Namespaces that still had auto-monitoring resources at the end of part IV, which now should become unmonitored.