From bcb06a5eb29adea74b4c8649241c40cc83cb0e79 Mon Sep 17 00:00:00 2001 From: Tarun Chinmai Sekar Date: Thu, 14 May 2026 19:09:41 -0700 Subject: [PATCH 1/3] [feat]: implement maintenance watcher for linodeCluster --- clients/clients.go | 6 +- docs/src/topics/health-checking.md | 93 ++++ .../controller/linodecluster_controller.go | 77 ++++ ...nodecluster_controller_maintenance_test.go | 431 ++++++++++++++++++ mock/client.go | 54 +++ .../wrappers/linodeclient/linodeclient.gen.go | 25 + 6 files changed, 685 insertions(+), 1 deletion(-) create mode 100644 internal/controller/linodecluster_controller_maintenance_test.go diff --git a/clients/clients.go b/clients/clients.go index ce954fe8d..c321fb396 100644 --- a/clients/clients.go +++ b/clients/clients.go @@ -24,7 +24,7 @@ type LinodeClient interface { LinodeFirewallClient LinodeTokenClient LinodeInterfacesClient - + LinodeMaintenanceClient OnAfterResponse(m func(response *resty.Response) error) } @@ -134,6 +134,10 @@ type LinodeInterfacesClient interface { ListInterfaceFirewalls(ctx context.Context, linodeID int, interfaceID int, opts *linodego.ListOptions) ([]linodego.Firewall, error) } +type LinodeMaintenanceClient interface { + ListMaintenances(ctx context.Context, opts *linodego.ListOptions) ([]linodego.AccountMaintenance, error) +} + type K8sClient interface { client.Client } diff --git a/docs/src/topics/health-checking.md b/docs/src/topics/health-checking.md index 254c6fee7..381ac7bf5 100644 --- a/docs/src/topics/health-checking.md +++ b/docs/src/topics/health-checking.md @@ -23,3 +23,96 @@ on the infrastructure provider. Refer to the [Cluster API documentation](https://cluster-api.sigs.k8s.io/tasks/automated-machine-management/healthchecking) for further information on configuring and using `MachineHealthChecks`. + +## Replacing Machines Scheduled for Maintenance + +CAPL detects upcoming Linode infrastructure maintenance windows and sets a `MaintenanceScheduled` condition on +the corresponding CAPI `Machine` objects. This condition can be used as a trigger for `MachineHealthCheck` to +automatically replace machines before their maintenance window begins. + +### How it works + +During each `LinodeCluster` reconciliation, CAPL queries the Linode API for maintenance events scheduled within +the next 72 hours. For each Linode instance that matches a `LinodeMachine` in the cluster, CAPL sets: + +``` +condition: + type: MaintenanceScheduled + status: "True" +``` + +on the owning CAPI `Machine` object. A `MachineHealthCheck` with `unhealthyMachineConditions` targeting this +condition will then trigger remediation — replacing the machine before the maintenance window starts. + +### Example MachineHealthCheck + +The following `MachineHealthCheck` replaces worker machines when `MaintenanceScheduled=True` has been set for +more than 1 hour: + +```yaml +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachineHealthCheck +metadata: + name: ${CLUSTER_NAME}-maintenance +spec: + clusterName: ${CLUSTER_NAME} + selector: + matchLabels: + cluster.x-k8s.io/deployment-name: ${CLUSTER_NAME} + checks: + unhealthyMachineConditions: + - type: MaintenanceScheduled + status: "True" + timeoutSeconds: 3600 + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 1 +``` + +For control plane machines managed by `KubeadmControlPlane`: + +```yaml +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachineHealthCheck +metadata: + name: ${CLUSTER_NAME}-cp-maintenance +spec: + clusterName: ${CLUSTER_NAME} + selector: + matchLabels: + cluster.x-k8s.io/control-plane: "" + checks: + unhealthyMachineConditions: + - type: MaintenanceScheduled + status: "True" + timeoutSeconds: 3600 + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 1 +``` + +### Field reference + +| Field | Description | +|-------|-------------| +| `checks.unhealthyMachineConditions` | Conditions checked on the CAPI `Machine` object (not the Node). `MaintenanceScheduled` is set here by CAPL. | +| `type: MaintenanceScheduled` | The condition type set by CAPL when a Linode maintenance event is scheduled within 72 hours. | +| `status: "True"` | The condition status that indicates maintenance is scheduled. | +| `timeoutSeconds` | How long the condition must be present before remediation is triggered. Set this to a value less than the expected lead time before the maintenance window starts. | +| `remediation.triggerIf.unhealthyLessThanOrEqualTo` | Prevents remediation if too many machines are already unhealthy. For control plane clusters, set to `1` to avoid remediating multiple control plane nodes simultaneously and losing etcd quorum. | + +### Choosing a timeout + +CAPL sets `MaintenanceScheduled` up to 72 hours before the maintenance window. A `timeoutSeconds` of `3600` +(1 hour) means remediation begins 71 hours before the window at the earliest. Adjust this value based on +how much lead time your workloads require for graceful draining. + +### Limitations + +- Only machines owned by a `MachineSet` or `KubeadmControlPlane` can be remediated by a `MachineHealthCheck`. + Standalone machines are not eligible. +- The `MaintenanceScheduled` condition is never explicitly cleared by CAPL. Machines will be replaced by the + `MachineHealthCheck` before the condition is removed, which is the intended behavior. +- Control plane remediation preserves etcd quorum: CAPI will not remediate a second control plane machine + until the replacement for the first is healthy. Set `unhealthyLessThanOrEqualTo: 1` for control plane + `MachineHealthChecks` to prevent simultaneous replacements. diff --git a/internal/controller/linodecluster_controller.go b/internal/controller/linodecluster_controller.go index c40c8a292..2a49038d1 100644 --- a/internal/controller/linodecluster_controller.go +++ b/internal/controller/linodecluster_controller.go @@ -24,6 +24,7 @@ import ( "time" "github.com/go-logr/logr" + "github.com/linode/linodego" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -31,6 +32,8 @@ import ( "k8s.io/client-go/tools/events" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" kutil "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/paused" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" @@ -54,6 +57,7 @@ const ( lbTypeNB string = "NodeBalancer" ConditionPreflightLinodeVPCReady string = "PreflightLinodeVPCReady" ConditionPreflightLinodeNBFirewallReady string = "PreflightLinodeNBFirewallReady" + ConditionMaintenanceScheduled string = "MaintenanceScheduled" ) // LinodeClusterReconciler reconciles a LinodeCluster object @@ -218,9 +222,82 @@ func (r *LinodeClusterReconciler) reconcile( return retryIfTransient(err, logger) } + if err := r.setMaintenanceConditions(ctx, clusterScope, logger); err != nil { + return retryIfTransient(err, logger) + } + return res, nil } +func (r *LinodeClusterReconciler) setMaintenanceConditions(ctx context.Context, clusterScope *scope.ClusterScope, logger logr.Logger) error { + linodeMachines, err := r.collectMaintenanceInfo(ctx, clusterScope, logger) + if err != nil { + return err + } + var errs []error + for i := range linodeMachines { + capiMachine, err := kutil.GetOwnerMachine(ctx, clusterScope.Client, linodeMachines[i].ObjectMeta) + if err != nil { + errs = append(errs, fmt.Errorf("failed to get owner Machine for LinodeMachine %s: %w", linodeMachines[i].Name, err)) + continue + } + if capiMachine == nil { + logger.Info("no owner Machine found for LinodeMachine, skipping", "LinodeMachine", linodeMachines[i].Name) + continue + } + patchHelper, err := patch.NewHelper(capiMachine, clusterScope.Client) + if err != nil { + errs = append(errs, fmt.Errorf("failed to create patch helper for Machine %s: %w", capiMachine.Name, err)) + continue + } + conditions.Set(capiMachine, metav1.Condition{ + Type: ConditionMaintenanceScheduled, + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Reason: ConditionMaintenanceScheduled, + }) + if err := patchHelper.Patch(ctx, capiMachine); err != nil { + errs = append(errs, fmt.Errorf("failed to patch Machine %s: %w", capiMachine.Name, err)) + continue + } + } + return utilerrors.NewAggregate(errs) +} + +func (r *LinodeClusterReconciler) collectMaintenanceInfo(ctx context.Context, clusterScope *scope.ClusterScope, logger logr.Logger) ([]infrav1alpha2.LinodeMachine, error) { + // Fetch all maintenance information + threeDaysLater := time.Now().Add(72 * time.Hour).UTC().Format("2006-01-02T15:04:05") // API doesn't like RFC3339 + f := linodego.Filter{} + f.AddField(linodego.Eq, "status", "scheduled") + f.AddField(linodego.Lte, "when", threeDaysLater) + filter, err := f.MarshalJSON() + if err != nil { + return nil, fmt.Errorf("could not marshal filter: %w", err) + } + maintenances, err := clusterScope.LinodeClient.ListMaintenances(ctx, &linodego.ListOptions{Filter: string(filter)}) + if err != nil { + logger.Error(err, "Failed to fetch maintenance information from Linode API") + return nil, err + } + + maintenanceLabels := make(map[string]struct{}, len(maintenances)) + for _, maint := range maintenances { + if maint.Entity.Type != "linode" { + continue + } + maintenanceLabels[maint.Entity.Label] = struct{}{} + } + + var machinesForMaintenance []infrav1alpha2.LinodeMachine + for _, lm := range clusterScope.LinodeMachines.Items { + if _, ok := maintenanceLabels[lm.Name]; ok { + logger.Info("Found maintenance information for", "LinodeMachine", lm.Name) + machinesForMaintenance = append(machinesForMaintenance, lm) + } + } + return machinesForMaintenance, nil +} + func (r *LinodeClusterReconciler) performPreflightChecks(ctx context.Context, logger logr.Logger, clusterScope *scope.ClusterScope) (ctrl.Result, error) { // Check VPC configuration - either direct ID or reference if clusterScope.LinodeCluster.Spec.VPCID != nil || clusterScope.LinodeCluster.Spec.VPCRef != nil { diff --git a/internal/controller/linodecluster_controller_maintenance_test.go b/internal/controller/linodecluster_controller_maintenance_test.go new file mode 100644 index 000000000..de81608a2 --- /dev/null +++ b/internal/controller/linodecluster_controller_maintenance_test.go @@ -0,0 +1,431 @@ +/* +Copyright 2023 Akamai Technologies, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "errors" + "testing" + + "github.com/go-logr/logr/testr" + "github.com/linode/linodego" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/controller-runtime/pkg/client" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" + + infrav1alpha2 "github.com/linode/cluster-api-provider-linode/api/v1alpha2" + "github.com/linode/cluster-api-provider-linode/cloud/scope" + "github.com/linode/cluster-api-provider-linode/mock" +) + +func TestCollectMaintenanceInfo(t *testing.T) { + t.Parallel() + + const clusterName = "test-cluster" + const ns = defaultNamespace + + linodeMachine := func(name string) infrav1alpha2.LinodeMachine { + return infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + Name: name, + UID: types.UID("uid-" + name), + }, + }, + }, + } + } + + maintenanceFor := func(label string) linodego.AccountMaintenance { + return linodego.AccountMaintenance{ + Entity: &linodego.Entity{ + Type: "linode", + Label: label, + }, + Status: "scheduled", + } + } + + tests := []struct { + name string + linodeMachines []infrav1alpha2.LinodeMachine + apiMaintenances []linodego.AccountMaintenance + apiError error + expectedNames []string + expectError bool + }{ + { + name: "no maintenance scheduled", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + linodeMachine("machine-2"), + }, + apiMaintenances: []linodego.AccountMaintenance{}, + expectedNames: nil, + }, + { + name: "one machine has maintenance", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + linodeMachine("machine-2"), + }, + apiMaintenances: []linodego.AccountMaintenance{ + maintenanceFor("machine-1"), + }, + expectedNames: []string{"machine-1"}, + }, + { + name: "multiple machines have maintenance", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + linodeMachine("machine-2"), + linodeMachine("machine-3"), + }, + apiMaintenances: []linodego.AccountMaintenance{ + maintenanceFor("machine-1"), + maintenanceFor("machine-3"), + }, + expectedNames: []string{"machine-1", "machine-3"}, + }, + { + name: "maintenance label from different cluster is ignored (exact match)", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + }, + apiMaintenances: []linodego.AccountMaintenance{ + maintenanceFor("other-cluster-machine-1"), + }, + expectedNames: nil, + }, + { + name: "non-linode entity type is ignored", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + }, + apiMaintenances: []linodego.AccountMaintenance{ + { + Entity: &linodego.Entity{ + Type: "volume", + Label: "machine-1", + }, + Status: "scheduled", + }, + }, + expectedNames: nil, + }, + { + name: "maintenance API call fails", + linodeMachines: []infrav1alpha2.LinodeMachine{ + linodeMachine("machine-1"), + }, + apiError: errors.New("linode API unavailable"), + expectError: true, + }, + { + name: "no LinodeMachines in cluster", + linodeMachines: []infrav1alpha2.LinodeMachine{}, + apiMaintenances: []linodego.AccountMaintenance{maintenanceFor("orphan-machine")}, + expectedNames: nil, + }, + } + + for _, tt := range tests { + tc := tt + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mockLinodeClient := mock.NewMockLinodeClient(mockCtrl) + mockLinodeClient.EXPECT(). + ListMaintenances(gomock.Any(), gomock.Any()). + Return(tc.apiMaintenances, tc.apiError) + + clusterScope := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: ns, + }, + }, + LinodeMachines: infrav1alpha2.LinodeMachineList{ + Items: tc.linodeMachines, + }, + LinodeClient: mockLinodeClient, + } + + reconciler := LinodeClusterReconciler{} + logger := testr.New(t) + + result, err := reconciler.collectMaintenanceInfo(context.Background(), clusterScope, logger) + + if tc.expectError { + require.Error(t, err) + return + } + require.NoError(t, err) + + var resultNames []string + for _, lm := range result { + resultNames = append(resultNames, lm.Name) + } + assert.ElementsMatch(t, tc.expectedNames, resultNames) + }) + } +} + +func TestSetMaintenanceConditions(t *testing.T) { + t.Parallel() + + const clusterName = "test-cluster" + const ns = defaultNamespace + + testScheme := runtime.NewScheme() + require.NoError(t, clientgoscheme.AddToScheme(testScheme)) + require.NoError(t, clusterv1.AddToScheme(testScheme)) + require.NoError(t, infrav1alpha2.AddToScheme(testScheme)) + + linodeMachineWithOwner := func(name string) infrav1alpha2.LinodeMachine { + return infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + Name: name, + UID: types.UID("uid-" + name), + }, + }, + }, + } + } + + linodeMachineNoOwner := func(name string) infrav1alpha2.LinodeMachine { + return infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, + } + } + + capMachine := func(name string) *clusterv1.Machine { + return &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + UID: types.UID("uid-" + name), + }, + } + } + + t.Run("no maintenance — no conditions set", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil) + + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, + LinodeClient: ml, + Client: fakeClient, + } + err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) + require.NoError(t, err) + }) + + t.Run("one machine in maintenance — MaintenanceScheduled condition set", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + }, nil) + + machine := capMachine("machine-1") + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(machine).WithStatusSubresource(machine).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, + LinodeClient: ml, + Client: fakeClient, + } + err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) + require.NoError(t, err) + + updated := &clusterv1.Machine{} + require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{Name: "machine-1", Namespace: ns}, updated)) + var found bool + for _, c := range updated.Status.Conditions { + if c.Type == ConditionMaintenanceScheduled { + found = true + assert.Equal(t, metav1.ConditionTrue, c.Status) + } + } + assert.True(t, found, "expected MaintenanceScheduled condition to be set") + }) + + t.Run("two machines in maintenance — both get conditions set", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + {Entity: &linodego.Entity{Type: "linode", Label: "machine-2"}, Status: "scheduled"}, + }, nil) + + m1, m2 := capMachine("machine-1"), capMachine("machine-2") + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(m1, m2).WithStatusSubresource(m1, m2).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{ + linodeMachineWithOwner("machine-1"), + linodeMachineWithOwner("machine-2"), + }}, + LinodeClient: ml, + Client: fakeClient, + } + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) + + for _, name := range []string{"machine-1", "machine-2"} { + updated := &clusterv1.Machine{} + require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{Name: name, Namespace: ns}, updated)) + var found bool + for _, c := range updated.Status.Conditions { + if c.Type == ConditionMaintenanceScheduled { + found = true + } + } + assert.Truef(t, found, "expected MaintenanceScheduled condition on %s", name) + } + }) + + t.Run("only machines matching maintenance are patched — other machines unaffected", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + }, nil) + + m1, m2 := capMachine("machine-1"), capMachine("machine-2") + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(m1, m2).WithStatusSubresource(m1, m2).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{ + linodeMachineWithOwner("machine-1"), + linodeMachineWithOwner("machine-2"), + }}, + LinodeClient: ml, + Client: fakeClient, + } + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) + + m1Updated := &clusterv1.Machine{} + require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{Name: "machine-1", Namespace: ns}, m1Updated)) + var m1HasCondition bool + for _, c := range m1Updated.Status.Conditions { + if c.Type == ConditionMaintenanceScheduled { + m1HasCondition = true + } + } + assert.True(t, m1HasCondition, "machine-1 should have MaintenanceScheduled condition") + + m2Updated := &clusterv1.Machine{} + require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{Name: "machine-2", Namespace: ns}, m2Updated)) + for _, c := range m2Updated.Status.Conditions { + assert.NotEqual(t, ConditionMaintenanceScheduled, c.Type, "machine-2 should not have MaintenanceScheduled condition") + } + }) + + t.Run("LinodeMachine has no owner reference — skipped without error", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + {Entity: &linodego.Entity{Type: "linode", Label: "machine-orphan"}, Status: "scheduled"}, + }, nil) + + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineNoOwner("machine-orphan")}}, + LinodeClient: ml, + Client: fakeClient, + } + err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) + require.NoError(t, err) + }) + + t.Run("GetOwnerMachine fails — error aggregated", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + mk := mock.NewMockK8sClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + }, nil) + mk.EXPECT(). + Get(gomock.Any(), gomock.Any(), gomock.AssignableToTypeOf(&clusterv1.Machine{}), gomock.Any()). + Return(errors.New("API server unavailable")) + + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, + LinodeClient: ml, + Client: mk, + } + err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to get owner Machine") + }) + + t.Run("ListMaintenances API error — returned immediately", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return(nil, errors.New("API down")) + + fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cs := &scope.ClusterScope{ + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, + LinodeClient: ml, + Client: fakeClient, + } + err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) + require.Error(t, err) + assert.Contains(t, err.Error(), "API down") + }) +} diff --git a/mock/client.go b/mock/client.go index 62c7ef285..c1ea18c5f 100644 --- a/mock/client.go +++ b/mock/client.go @@ -772,6 +772,21 @@ func (mr *MockLinodeClientMockRecorder) ListInterfaces(ctx, linodeID, opts any) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListInterfaces", reflect.TypeOf((*MockLinodeClient)(nil).ListInterfaces), ctx, linodeID, opts) } +// ListMaintenances mocks base method. +func (m *MockLinodeClient) ListMaintenances(ctx context.Context, opts *linodego.ListOptions) ([]linodego.AccountMaintenance, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListMaintenances", ctx, opts) + ret0, _ := ret[0].([]linodego.AccountMaintenance) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListMaintenances indicates an expected call of ListMaintenances. +func (mr *MockLinodeClientMockRecorder) ListMaintenances(ctx, opts any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListMaintenances", reflect.TypeOf((*MockLinodeClient)(nil).ListMaintenances), ctx, opts) +} + // ListNodeBalancerNodes mocks base method. func (m *MockLinodeClient) ListNodeBalancerNodes(ctx context.Context, nodebalancerID, configID int, opts *linodego.ListOptions) ([]linodego.NodeBalancerNode, error) { m.ctrl.T.Helper() @@ -2291,6 +2306,45 @@ func (mr *MockLinodeInterfacesClientMockRecorder) ListInterfaces(ctx, linodeID, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListInterfaces", reflect.TypeOf((*MockLinodeInterfacesClient)(nil).ListInterfaces), ctx, linodeID, opts) } +// MockLinodeMaintenanceClient is a mock of LinodeMaintenanceClient interface. +type MockLinodeMaintenanceClient struct { + ctrl *gomock.Controller + recorder *MockLinodeMaintenanceClientMockRecorder + isgomock struct{} +} + +// MockLinodeMaintenanceClientMockRecorder is the mock recorder for MockLinodeMaintenanceClient. +type MockLinodeMaintenanceClientMockRecorder struct { + mock *MockLinodeMaintenanceClient +} + +// NewMockLinodeMaintenanceClient creates a new mock instance. +func NewMockLinodeMaintenanceClient(ctrl *gomock.Controller) *MockLinodeMaintenanceClient { + mock := &MockLinodeMaintenanceClient{ctrl: ctrl} + mock.recorder = &MockLinodeMaintenanceClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockLinodeMaintenanceClient) EXPECT() *MockLinodeMaintenanceClientMockRecorder { + return m.recorder +} + +// ListMaintenances mocks base method. +func (m *MockLinodeMaintenanceClient) ListMaintenances(ctx context.Context, opts *linodego.ListOptions) ([]linodego.AccountMaintenance, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListMaintenances", ctx, opts) + ret0, _ := ret[0].([]linodego.AccountMaintenance) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListMaintenances indicates an expected call of ListMaintenances. +func (mr *MockLinodeMaintenanceClientMockRecorder) ListMaintenances(ctx, opts any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListMaintenances", reflect.TypeOf((*MockLinodeMaintenanceClient)(nil).ListMaintenances), ctx, opts) +} + // MockK8sClient is a mock of K8sClient interface. type MockK8sClient struct { ctrl *gomock.Controller diff --git a/observability/wrappers/linodeclient/linodeclient.gen.go b/observability/wrappers/linodeclient/linodeclient.gen.go index f2bc15fa3..b21678987 100644 --- a/observability/wrappers/linodeclient/linodeclient.gen.go +++ b/observability/wrappers/linodeclient/linodeclient.gen.go @@ -1273,6 +1273,31 @@ func (_d LinodeClientWithTracing) ListInterfaces(ctx context.Context, linodeID i return _d.LinodeClient.ListInterfaces(ctx, linodeID, opts) } +// ListMaintenances implements _sourceClients.LinodeClient +func (_d LinodeClientWithTracing) ListMaintenances(ctx context.Context, opts *linodego.ListOptions) (aa1 []linodego.AccountMaintenance, err error) { + ctx, _span := tracing.Start(ctx, "_sourceClients.LinodeClient.ListMaintenances") + defer func() { + if _d._spanDecorator != nil { + _d._spanDecorator(_span, map[string]interface{}{ + "ctx": ctx, + "opts": opts}, map[string]interface{}{ + "aa1": aa1, + "err": err}) + } + + if err != nil { + _span.RecordError(err) + _span.SetAttributes( + attribute.String("event", "error"), + attribute.String("message", err.Error()), + ) + } + + _span.End() + }() + return _d.LinodeClient.ListMaintenances(ctx, opts) +} + // ListNodeBalancerNodes implements _sourceClients.LinodeClient func (_d LinodeClientWithTracing) ListNodeBalancerNodes(ctx context.Context, nodebalancerID int, configID int, opts *linodego.ListOptions) (na1 []linodego.NodeBalancerNode, err error) { ctx, _span := tracing.Start(ctx, "_sourceClients.LinodeClient.ListNodeBalancerNodes") From f7e9aec8f2fbb972ddb1175439f654b673e50af1 Mon Sep 17 00:00:00 2001 From: Tarun Chinmai Sekar Date: Fri, 15 May 2026 12:13:45 -0700 Subject: [PATCH 2/3] fixes from review --- .../controller/linodecluster_controller.go | 31 +- ...nodecluster_controller_maintenance_test.go | 409 ++++++++++-------- util/helpers.go | 18 + 3 files changed, 265 insertions(+), 193 deletions(-) diff --git a/internal/controller/linodecluster_controller.go b/internal/controller/linodecluster_controller.go index 2a49038d1..77ebaf58e 100644 --- a/internal/controller/linodecluster_controller.go +++ b/internal/controller/linodecluster_controller.go @@ -73,7 +73,8 @@ type LinodeClusterReconciler struct { // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters/finalizers,verbs=update - +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;watch;list +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines/status,verbs=get;update;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -251,10 +252,9 @@ func (r *LinodeClusterReconciler) setMaintenanceConditions(ctx context.Context, continue } conditions.Set(capiMachine, metav1.Condition{ - Type: ConditionMaintenanceScheduled, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.Now(), - Reason: ConditionMaintenanceScheduled, + Type: ConditionMaintenanceScheduled, + Status: metav1.ConditionTrue, + Reason: ConditionMaintenanceScheduled, }) if err := patchHelper.Patch(ctx, capiMachine); err != nil { errs = append(errs, fmt.Errorf("failed to patch Machine %s: %w", capiMachine.Name, err)) @@ -280,18 +280,29 @@ func (r *LinodeClusterReconciler) collectMaintenanceInfo(ctx context.Context, cl return nil, err } - maintenanceLabels := make(map[string]struct{}, len(maintenances)) + maintenanceLabels := make(map[int]struct{}, len(maintenances)) for _, maint := range maintenances { + if maint.Entity == nil { + continue + } if maint.Entity.Type != "linode" { continue } - maintenanceLabels[maint.Entity.Label] = struct{}{} + maintenanceLabels[maint.Entity.ID] = struct{}{} } var machinesForMaintenance []infrav1alpha2.LinodeMachine - for _, lm := range clusterScope.LinodeMachines.Items { - if _, ok := maintenanceLabels[lm.Name]; ok { - logger.Info("Found maintenance information for", "LinodeMachine", lm.Name) + linodeMachines, err := util.GetLinodeMachinesForCluster(ctx, clusterScope.Client, clusterScope.Cluster) + if err != nil { + return nil, err + } + + for _, lm := range linodeMachines.Items { + if lm.Spec.InstanceID == nil { + continue + } + if _, ok := maintenanceLabels[*lm.Spec.InstanceID]; ok { + logger.Info("Found maintenance information for", "LinodeMachine", lm.Name, "id", *lm.Spec.InstanceID) machinesForMaintenance = append(machinesForMaintenance, lm) } } diff --git a/internal/controller/linodecluster_controller_maintenance_test.go b/internal/controller/linodecluster_controller_maintenance_test.go index de81608a2..ef1b8e6b3 100644 --- a/internal/controller/linodecluster_controller_maintenance_test.go +++ b/internal/controller/linodecluster_controller_maintenance_test.go @@ -19,6 +19,7 @@ package controller import ( "context" "errors" + "fmt" "testing" "github.com/go-logr/logr/testr" @@ -37,120 +38,139 @@ import ( infrav1alpha2 "github.com/linode/cluster-api-provider-linode/api/v1alpha2" "github.com/linode/cluster-api-provider-linode/cloud/scope" "github.com/linode/cluster-api-provider-linode/mock" + "github.com/linode/cluster-api-provider-linode/util" ) -func TestCollectMaintenanceInfo(t *testing.T) { - t.Parallel() +const clusterLabelKey = "cluster.x-k8s.io/cluster-name" - const clusterName = "test-cluster" - const ns = defaultNamespace +func testSchemeForMaintenance(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + require.NoError(t, clientgoscheme.AddToScheme(s)) + require.NoError(t, clusterv1.AddToScheme(s)) + require.NoError(t, infrav1alpha2.AddToScheme(s)) + return s +} - linodeMachine := func(name string) infrav1alpha2.LinodeMachine { - return infrav1alpha2.LinodeMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: clusterv1.GroupVersion.String(), - Kind: "Machine", - Name: name, - UID: types.UID("uid-" + name), - }, +func newLinodeMachineWithID(name, ns, clusterName string, instanceID int) infrav1alpha2.LinodeMachine { + return infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{ + clusterLabelKey: clusterName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + Name: name, + UID: types.UID("uid-" + name), }, }, - } + }, + Spec: infrav1alpha2.LinodeMachineSpec{ + InstanceID: util.Pointer(instanceID), + }, + } +} + +func maintenanceForID(id int) linodego.AccountMaintenance { + return linodego.AccountMaintenance{ + Entity: &linodego.Entity{ + Type: "linode", + ID: id, + }, + Status: "scheduled", } +} - maintenanceFor := func(label string) linodego.AccountMaintenance { - return linodego.AccountMaintenance{ - Entity: &linodego.Entity{ - Type: "linode", - Label: label, - }, - Status: "scheduled", - } +func newCapiCluster(name, ns string) *clusterv1.Cluster { + return &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, } +} + +// newCapiMachine UID must match the OwnerReference UID set by newLinodeMachineWithID. +func newCapiMachine(name, ns string) *clusterv1.Machine { + return &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + UID: types.UID("uid-" + name), + }, + } +} + +func TestCollectMaintenanceInfo(t *testing.T) { + t.Parallel() + + const clusterName = "test-cluster" + const ns = defaultNamespace + + scheme := testSchemeForMaintenance(t) tests := []struct { - name string - linodeMachines []infrav1alpha2.LinodeMachine - apiMaintenances []linodego.AccountMaintenance - apiError error - expectedNames []string - expectError bool + name string + instanceIDs []int + apiMaintenances []linodego.AccountMaintenance + apiError error + expectedNames []string + expectError bool }{ { - name: "no maintenance scheduled", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - linodeMachine("machine-2"), - }, + name: "no maintenance scheduled", + instanceIDs: []int{101, 102}, apiMaintenances: []linodego.AccountMaintenance{}, expectedNames: nil, }, { - name: "one machine has maintenance", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - linodeMachine("machine-2"), - }, - apiMaintenances: []linodego.AccountMaintenance{ - maintenanceFor("machine-1"), - }, - expectedNames: []string{"machine-1"}, + name: "one machine has maintenance", + instanceIDs: []int{101, 102}, + apiMaintenances: []linodego.AccountMaintenance{maintenanceForID(101)}, + expectedNames: []string{"machine-0"}, }, { - name: "multiple machines have maintenance", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - linodeMachine("machine-2"), - linodeMachine("machine-3"), - }, + name: "multiple machines have maintenance", + instanceIDs: []int{101, 102, 103}, apiMaintenances: []linodego.AccountMaintenance{ - maintenanceFor("machine-1"), - maintenanceFor("machine-3"), + maintenanceForID(101), + maintenanceForID(103), }, - expectedNames: []string{"machine-1", "machine-3"}, + expectedNames: []string{"machine-0", "machine-2"}, }, { - name: "maintenance label from different cluster is ignored (exact match)", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - }, + name: "maintenance entity ID not matching any machine is ignored", + instanceIDs: []int{101}, + apiMaintenances: []linodego.AccountMaintenance{maintenanceForID(999)}, + expectedNames: nil, + }, + { + name: "non-linode entity type is ignored", + instanceIDs: []int{101}, apiMaintenances: []linodego.AccountMaintenance{ - maintenanceFor("other-cluster-machine-1"), + {Entity: &linodego.Entity{Type: "volume", ID: 101}, Status: "scheduled"}, }, expectedNames: nil, }, { - name: "non-linode entity type is ignored", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - }, + name: "nil entity is ignored", + instanceIDs: []int{101}, apiMaintenances: []linodego.AccountMaintenance{ - { - Entity: &linodego.Entity{ - Type: "volume", - Label: "machine-1", - }, - Status: "scheduled", - }, + {Entity: nil, Status: "scheduled"}, }, expectedNames: nil, }, { - name: "maintenance API call fails", - linodeMachines: []infrav1alpha2.LinodeMachine{ - linodeMachine("machine-1"), - }, + name: "maintenance API call fails", + instanceIDs: []int{101}, apiError: errors.New("linode API unavailable"), expectError: true, }, { name: "no LinodeMachines in cluster", - linodeMachines: []infrav1alpha2.LinodeMachine{}, - apiMaintenances: []linodego.AccountMaintenance{maintenanceFor("orphan-machine")}, + instanceIDs: []int{}, + apiMaintenances: []linodego.AccountMaintenance{maintenanceForID(101)}, expectedNames: nil, }, } @@ -168,23 +188,22 @@ func TestCollectMaintenanceInfo(t *testing.T) { ListMaintenances(gomock.Any(), gomock.Any()). Return(tc.apiMaintenances, tc.apiError) - clusterScope := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: clusterName, - Namespace: ns, - }, - }, - LinodeMachines: infrav1alpha2.LinodeMachineList{ - Items: tc.linodeMachines, - }, - LinodeClient: mockLinodeClient, + cluster := newCapiCluster(clusterName, ns) + objs := []client.Object{cluster} + for i, id := range tc.instanceIDs { + lm := newLinodeMachineWithID(fmt.Sprintf("machine-%d", i), ns, clusterName, id) + objs = append(objs, &lm) } + fakeClient := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() - reconciler := LinodeClusterReconciler{} - logger := testr.New(t) + clusterScope := &scope.ClusterScope{ + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: mockLinodeClient, + Client: fakeClient, + } - result, err := reconciler.collectMaintenanceInfo(context.Background(), clusterScope, logger) + result, err := (&LinodeClusterReconciler{}).collectMaintenanceInfo(context.Background(), clusterScope, testr.New(t)) if tc.expectError { require.Error(t, err) @@ -207,46 +226,7 @@ func TestSetMaintenanceConditions(t *testing.T) { const clusterName = "test-cluster" const ns = defaultNamespace - testScheme := runtime.NewScheme() - require.NoError(t, clientgoscheme.AddToScheme(testScheme)) - require.NoError(t, clusterv1.AddToScheme(testScheme)) - require.NoError(t, infrav1alpha2.AddToScheme(testScheme)) - - linodeMachineWithOwner := func(name string) infrav1alpha2.LinodeMachine { - return infrav1alpha2.LinodeMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: clusterv1.GroupVersion.String(), - Kind: "Machine", - Name: name, - UID: types.UID("uid-" + name), - }, - }, - }, - } - } - - linodeMachineNoOwner := func(name string) infrav1alpha2.LinodeMachine { - return infrav1alpha2.LinodeMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - }, - } - } - - capMachine := func(name string) *clusterv1.Machine { - return &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - UID: types.UID("uid-" + name), - }, - } - } + scheme := testSchemeForMaintenance(t) t.Run("no maintenance — no conditions set", func(t *testing.T) { t.Parallel() @@ -254,15 +234,16 @@ func TestSetMaintenanceConditions(t *testing.T) { ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil) - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cluster := newCapiCluster(clusterName, ns) + lm := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + fakeClient := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, &lm).Build() cs := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, - LinodeClient: ml, - Client: fakeClient, + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: fakeClient, } - err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) - require.NoError(t, err) + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) }) t.Run("one machine in maintenance — MaintenanceScheduled condition set", func(t *testing.T) { @@ -270,19 +251,24 @@ func TestSetMaintenanceConditions(t *testing.T) { mockCtrl := gomock.NewController(t) ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ - {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + maintenanceForID(101), }, nil) - machine := capMachine("machine-1") - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(machine).WithStatusSubresource(machine).Build() + cluster := newCapiCluster(clusterName, ns) + lm := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + machine := newCapiMachine("machine-1", ns) + fakeClient := fakeclient.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster, &lm, machine). + WithStatusSubresource(machine). + Build() cs := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, - LinodeClient: ml, - Client: fakeClient, + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: fakeClient, } - err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) - require.NoError(t, err) + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) updated := &clusterv1.Machine{} require.NoError(t, fakeClient.Get(context.Background(), client.ObjectKey{Name: "machine-1", Namespace: ns}, updated)) @@ -301,20 +287,24 @@ func TestSetMaintenanceConditions(t *testing.T) { mockCtrl := gomock.NewController(t) ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ - {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, - {Entity: &linodego.Entity{Type: "linode", Label: "machine-2"}, Status: "scheduled"}, + maintenanceForID(101), + maintenanceForID(102), }, nil) - m1, m2 := capMachine("machine-1"), capMachine("machine-2") - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(m1, m2).WithStatusSubresource(m1, m2).Build() + cluster := newCapiCluster(clusterName, ns) + lm1 := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + lm2 := newLinodeMachineWithID("machine-2", ns, clusterName, 102) + m1, m2 := newCapiMachine("machine-1", ns), newCapiMachine("machine-2", ns) + fakeClient := fakeclient.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster, &lm1, &lm2, m1, m2). + WithStatusSubresource(m1, m2). + Build() cs := &scope.ClusterScope{ + Cluster: cluster, LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{ - linodeMachineWithOwner("machine-1"), - linodeMachineWithOwner("machine-2"), - }}, - LinodeClient: ml, - Client: fakeClient, + LinodeClient: ml, + Client: fakeClient, } require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) @@ -336,19 +326,23 @@ func TestSetMaintenanceConditions(t *testing.T) { mockCtrl := gomock.NewController(t) ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ - {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + maintenanceForID(101), }, nil) - m1, m2 := capMachine("machine-1"), capMachine("machine-2") - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).WithObjects(m1, m2).WithStatusSubresource(m1, m2).Build() + cluster := newCapiCluster(clusterName, ns) + lm1 := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + lm2 := newLinodeMachineWithID("machine-2", ns, clusterName, 102) + m1, m2 := newCapiMachine("machine-1", ns), newCapiMachine("machine-2", ns) + fakeClient := fakeclient.NewClientBuilder(). + WithScheme(scheme). + WithObjects(cluster, &lm1, &lm2, m1, m2). + WithStatusSubresource(m1, m2). + Build() cs := &scope.ClusterScope{ + Cluster: cluster, LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{ - linodeMachineWithOwner("machine-1"), - linodeMachineWithOwner("machine-2"), - }}, - LinodeClient: ml, - Client: fakeClient, + LinodeClient: ml, + Client: fakeClient, } require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) @@ -369,23 +363,59 @@ func TestSetMaintenanceConditions(t *testing.T) { } }) + t.Run("LinodeMachine has no InstanceID — skipped without error", func(t *testing.T) { + t.Parallel() + mockCtrl := gomock.NewController(t) + ml := mock.NewMockLinodeClient(mockCtrl) + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ + maintenanceForID(101), + }, nil) + + cluster := newCapiCluster(clusterName, ns) + lmNoID := infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-noid", + Namespace: ns, + Labels: map[string]string{clusterLabelKey: clusterName}, + }, + } + fakeClient := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, &lmNoID).Build() + cs := &scope.ClusterScope{ + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: fakeClient, + } + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) + }) + t.Run("LinodeMachine has no owner reference — skipped without error", func(t *testing.T) { t.Parallel() mockCtrl := gomock.NewController(t) ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ - {Entity: &linodego.Entity{Type: "linode", Label: "machine-orphan"}, Status: "scheduled"}, + maintenanceForID(101), }, nil) - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cluster := newCapiCluster(clusterName, ns) + lmNoOwner := infrav1alpha2.LinodeMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-orphan", + Namespace: ns, + Labels: map[string]string{clusterLabelKey: clusterName}, + }, + Spec: infrav1alpha2.LinodeMachineSpec{ + InstanceID: util.Pointer(101), + }, + } + fakeClient := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, &lmNoOwner).Build() cs := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineNoOwner("machine-orphan")}}, - LinodeClient: ml, - Client: fakeClient, + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: fakeClient, } - err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) - require.NoError(t, err) + require.NoError(t, (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t))) }) t.Run("GetOwnerMachine fails — error aggregated", func(t *testing.T) { @@ -393,18 +423,28 @@ func TestSetMaintenanceConditions(t *testing.T) { mockCtrl := gomock.NewController(t) ml := mock.NewMockLinodeClient(mockCtrl) mk := mock.NewMockK8sClient(mockCtrl) + + lm := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + lmList := infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{lm}} + ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{ - {Entity: &linodego.Entity{Type: "linode", Label: "machine-1"}, Status: "scheduled"}, + maintenanceForID(101), }, nil) + mk.EXPECT(). + List(gomock.Any(), gomock.AssignableToTypeOf(&infrav1alpha2.LinodeMachineList{}), gomock.Any()). + DoAndReturn(func(_ context.Context, list *infrav1alpha2.LinodeMachineList, _ ...client.ListOption) error { + *list = lmList + return nil + }) mk.EXPECT(). Get(gomock.Any(), gomock.Any(), gomock.AssignableToTypeOf(&clusterv1.Machine{}), gomock.Any()). Return(errors.New("API server unavailable")) cs := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, - LinodeClient: ml, - Client: mk, + Cluster: newCapiCluster(clusterName, ns), + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: mk, } err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) require.Error(t, err) @@ -417,15 +457,18 @@ func TestSetMaintenanceConditions(t *testing.T) { ml := mock.NewMockLinodeClient(mockCtrl) ml.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return(nil, errors.New("API down")) - fakeClient := fakeclient.NewClientBuilder().WithScheme(testScheme).Build() + cluster := newCapiCluster(clusterName, ns) + lm := newLinodeMachineWithID("machine-1", ns, clusterName, 101) + fakeClient := fakeclient.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, &lm).Build() cs := &scope.ClusterScope{ - LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, - LinodeMachines: infrav1alpha2.LinodeMachineList{Items: []infrav1alpha2.LinodeMachine{linodeMachineWithOwner("machine-1")}}, - LinodeClient: ml, - Client: fakeClient, + Cluster: cluster, + LinodeCluster: &infrav1alpha2.LinodeCluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}}, + LinodeClient: ml, + Client: fakeClient, } err := (&LinodeClusterReconciler{}).setMaintenanceConditions(context.Background(), cs, testr.New(t)) require.Error(t, err) assert.Contains(t, err.Error(), "API down") }) } + diff --git a/util/helpers.go b/util/helpers.go index b7affd879..1e2d4a122 100644 --- a/util/helpers.go +++ b/util/helpers.go @@ -3,6 +3,7 @@ package util import ( "context" "errors" + "fmt" "io" "net" "net/http" @@ -11,7 +12,9 @@ import ( "strings" "github.com/linode/linodego" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -137,3 +140,18 @@ func SetOwnerReferenceToLinodeCluster(ctx context.Context, k8sclient client.Clie return nil } + +func GetLinodeMachinesForCluster(ctx context.Context, k8sclient client.Client, cluster *clusterv1.Cluster) (*infrav1alpha2.LinodeMachineList, error) { + clusterReq, err := labels.NewRequirement("cluster.x-k8s.io/cluster-name", selection.Equals, []string{cluster.Name}) + if err != nil { + return nil, fmt.Errorf("building label selector: %w", err) + } + + selector := labels.NewSelector() + selector = selector.Add(*clusterReq) + var linodeMachineList infrav1alpha2.LinodeMachineList + if err := k8sclient.List(ctx, &linodeMachineList, &client.ListOptions{Namespace: cluster.Namespace, LabelSelector: selector}); err != nil { + return nil, fmt.Errorf("listing linode machines %w", err) + } + return &linodeMachineList, nil +} From bfdc711f70a11d426432f86f897872c8c4d1fb70 Mon Sep 17 00:00:00 2001 From: Tarun Chinmai Sekar Date: Thu, 18 Jun 2026 10:27:24 -0700 Subject: [PATCH 3/3] fix: tests and re-gen stuff --- config/rbac/role.yaml | 16 ++++++++++++++++ internal/controller/linodecluster_controller.go | 14 ++++++++------ .../linodecluster_controller_maintenance_test.go | 1 - .../controller/linodecluster_controller_test.go | 7 +++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index a0aa974ef..8f5a5f8b2 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -21,11 +21,27 @@ rules: - cluster.x-k8s.io resources: - clusters + verbs: + - get + - list + - watch +- apiGroups: + - cluster.x-k8s.io + resources: - machines verbs: - get - list + - patch - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machines/status + verbs: + - get + - patch + - update - apiGroups: - infrastructure.cluster.x-k8s.io resources: diff --git a/internal/controller/linodecluster_controller.go b/internal/controller/linodecluster_controller.go index 77ebaf58e..f4131c8a0 100644 --- a/internal/controller/linodecluster_controller.go +++ b/internal/controller/linodecluster_controller.go @@ -60,6 +60,8 @@ const ( ConditionMaintenanceScheduled string = "MaintenanceScheduled" ) +var threeDays = 72 * time.Hour + // LinodeClusterReconciler reconciles a LinodeCluster object type LinodeClusterReconciler struct { client.Client @@ -73,7 +75,7 @@ type LinodeClusterReconciler struct { // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=linodeclusters/finalizers,verbs=update -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;watch;list +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;watch;list;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines/status,verbs=get;update;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -236,14 +238,14 @@ func (r *LinodeClusterReconciler) setMaintenanceConditions(ctx context.Context, return err } var errs []error - for i := range linodeMachines { - capiMachine, err := kutil.GetOwnerMachine(ctx, clusterScope.Client, linodeMachines[i].ObjectMeta) + for _, lm := range linodeMachines { + capiMachine, err := kutil.GetOwnerMachine(ctx, clusterScope.Client, lm.ObjectMeta) if err != nil { - errs = append(errs, fmt.Errorf("failed to get owner Machine for LinodeMachine %s: %w", linodeMachines[i].Name, err)) + errs = append(errs, fmt.Errorf("failed to get owner Machine for LinodeMachine %s: %w", lm.Name, err)) continue } if capiMachine == nil { - logger.Info("no owner Machine found for LinodeMachine, skipping", "LinodeMachine", linodeMachines[i].Name) + logger.Info("no owner Machine found for LinodeMachine, skipping", "LinodeMachine", lm.Name) continue } patchHelper, err := patch.NewHelper(capiMachine, clusterScope.Client) @@ -266,7 +268,7 @@ func (r *LinodeClusterReconciler) setMaintenanceConditions(ctx context.Context, func (r *LinodeClusterReconciler) collectMaintenanceInfo(ctx context.Context, clusterScope *scope.ClusterScope, logger logr.Logger) ([]infrav1alpha2.LinodeMachine, error) { // Fetch all maintenance information - threeDaysLater := time.Now().Add(72 * time.Hour).UTC().Format("2006-01-02T15:04:05") // API doesn't like RFC3339 + threeDaysLater := time.Now().Add(threeDays).UTC().Format("2006-01-02T15:04:05") // API doesn't like RFC3339 f := linodego.Filter{} f.AddField(linodego.Eq, "status", "scheduled") f.AddField(linodego.Lte, "when", threeDaysLater) diff --git a/internal/controller/linodecluster_controller_maintenance_test.go b/internal/controller/linodecluster_controller_maintenance_test.go index ef1b8e6b3..0b8b65ed5 100644 --- a/internal/controller/linodecluster_controller_maintenance_test.go +++ b/internal/controller/linodecluster_controller_maintenance_test.go @@ -471,4 +471,3 @@ func TestSetMaintenanceConditions(t *testing.T) { assert.Contains(t, err.Error(), "API down") }) } - diff --git a/internal/controller/linodecluster_controller_test.go b/internal/controller/linodecluster_controller_test.go index 7fcd50a70..2f4b204c9 100644 --- a/internal/controller/linodecluster_controller_test.go +++ b/internal/controller/linodecluster_controller_test.go @@ -114,6 +114,7 @@ var _ = Describe("cluster-lifecycle", Ordered, Label("cluster", "cluster-lifecyc reconciler.Recorder = mck.Recorder() Expect(k8sClient.Get(ctx, clusterKey, &linodeCluster)).To(Succeed()) + cScope.Cluster = &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: defaultNamespace}} cScope.LinodeCluster = &linodeCluster // Create patch helper with latest state of resource. @@ -310,6 +311,7 @@ var _ = Describe("cluster-lifecycle", Ordered, Label("cluster", "cluster-lifecyc Call("cluster is created", func(ctx context.Context, mck Mock) { cScope.LinodeClient = mck.LinodeClient cScope.LinodeCluster.Spec.Network.ApiserverNodeBalancerConfigID = nil + mck.LinodeClient.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil).AnyTimes() mck.LinodeClient.EXPECT().ListNodeBalancerNodes(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return([]linodego.NodeBalancerNode{}, nil).AnyTimes() getNB := mck.LinodeClient.EXPECT().GetNodeBalancer(gomock.Any(), gomock.Any()). Return(&linodego.NodeBalancer{ @@ -499,6 +501,7 @@ var _ = Describe("cluster-lifecycle-dns", Ordered, Label("cluster", "cluster-lif reconciler.Recorder = mck.Recorder() Expect(k8sClient.Get(ctx, clusterKey, &linodeCluster)).To(Succeed()) + cScope.Cluster = &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: defaultNamespace}} cScope.LinodeCluster = &linodeCluster // Create patch helper with latest state of resource. @@ -514,6 +517,7 @@ var _ = Describe("cluster-lifecycle-dns", Ordered, Label("cluster", "cluster-lif Call("cluster with dns loadbalancing is created", func(ctx context.Context, mck Mock) { cScope.LinodeClient = mck.LinodeClient cScope.LinodeDomainsClient = mck.LinodeClient + mck.LinodeClient.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil).AnyTimes() mck.LinodeClient.EXPECT().ListDomains(gomock.Any(), gomock.Any()).Return([]linodego.Domain{ { ID: 1, @@ -793,6 +797,7 @@ var _ = Describe("dns-override-endpoint", Ordered, Label("cluster", "dns-overrid cScope.LinodeClient = mck.LinodeClient cScope.LinodeDomainsClient = mck.LinodeClient cScope.AkamaiDomainsClient = mck.AkamEdgeDNSClient + mck.LinodeClient.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil).AnyTimes() linodeMachines := infrav1alpha2.LinodeMachineList{ Items: []infrav1alpha2.LinodeMachine{linodeMachine}, } @@ -879,6 +884,7 @@ var _ = Describe("cluster-with-direct-vpcid", Ordered, Label("cluster", "direct- reconciler.Recorder = mck.Recorder() Expect(k8sClient.Get(ctx, clusterKey, &linodeCluster)).To(Succeed()) + cScope.Cluster = &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: defaultNamespace}} cScope.LinodeCluster = &linodeCluster // Create patch helper with latest state of resource. @@ -892,6 +898,7 @@ var _ = Describe("cluster-with-direct-vpcid", Ordered, Label("cluster", "direct- Path( Call("direct VPCID exists and has subnets", func(ctx context.Context, mck Mock) { cScope.LinodeClient = mck.LinodeClient + mck.LinodeClient.EXPECT().ListMaintenances(gomock.Any(), gomock.Any()).Return([]linodego.AccountMaintenance{}, nil).AnyTimes() // Mock GetVPC call to return a VPC with subnets mck.LinodeClient.EXPECT().GetVPC(gomock.Any(), gomock.Eq(12345)).