Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion internal/controller/cloudscalecluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/predicates"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -189,6 +190,17 @@ func (r *CloudscaleClusterReconciler) reconcileNormal(ctx context.Context, clust
provisioned := r.isInfrastructureProvisioned(clusterScope)
clusterScope.CloudscaleCluster.Status.Initialization.Provisioned = new(provisioned)

// The load balancer health monitor is created only once the control plane is
// initialized, which itself requires the infrastructure to be provisioned.
// Running this last lets us poll for ControlPlaneInitialized without deadlocking the control plane we wait on.
result, err = r.reconcileLBHealthMonitor(ctx, clusterScope)
if err != nil {
return ctrl.Result{}, fmt.Errorf("reconciling load balancer health monitor: %w", err)
}
if !result.IsZero() {
return result, nil
}

return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -328,6 +340,12 @@ func (r *CloudscaleClusterReconciler) SetupWithManager(ctx context.Context, mgr
Watches(
&clusterv1.Cluster{},
handler.EnqueueRequestsFromMapFunc(util.ClusterToInfrastructureMapFunc(ctx, infrastructurev1beta2.SchemeGroupVersion.WithKind("CloudscaleCluster"), mgr.GetClient(), &infrastructurev1beta2.CloudscaleCluster{})),
builder.WithPredicates(predicates.Any(r.Scheme, logger,
// Resume reconciliation when the Cluster is paused/unpaused, and when
// the control plane initializes so the LB health monitor gets added.
predicates.ClusterPausedTransitions(r.Scheme, logger),
predicates.ClusterControlPlaneInitialized(r.Scheme, logger),
)),
).
Watches(
&infrastructurev1beta2.CloudscaleMachine{},
Expand All @@ -348,7 +366,7 @@ func (r *CloudscaleClusterReconciler) cloudscaleMachineToCluster(ctx context.Con
}

// Only care about control plane machines
if _, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok {
if _, ok := machine.Labels[clusterv1.MachineControlPlaneLabel]; !ok {
return nil
}

Expand Down
35 changes: 35 additions & 0 deletions internal/controller/cloudscalecluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,43 @@ import (

infrastructurev1beta2 "github.com/cloudscale-ch/cluster-api-provider-cloudscale/api/v1beta2"
"github.com/cloudscale-ch/cluster-api-provider-cloudscale/internal/scope"
"github.com/cloudscale-ch/cluster-api-provider-cloudscale/internal/testutils"
)

// TestCloudscaleMachineToCluster verifies the CloudscaleMachine watch maps a
// control-plane machine to its owning CloudscaleCluster.
func TestCloudscaleMachineToCluster(t *testing.T) {
cluster := &infrastructurev1beta2.CloudscaleCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Namespace: "default",
Labels: map[string]string{clusterv1.ClusterNameLabel: "test-cluster"},
},
}

t.Run("control-plane machine maps to its cluster", func(t *testing.T) {
g := NewWithT(t)
r := newTestReconciler(cluster)
mapFn := r.cloudscaleMachineToCluster(context.Background(), r.Client)

requests := mapFn(context.Background(), testutils.NewControlPlaneMachine("cp-0", "srv-0"))

g.Expect(requests).To(HaveLen(1))
g.Expect(requests[0].NamespacedName).To(Equal(types.NamespacedName{Name: "test-cluster", Namespace: "default"}))
})

t.Run("non control-plane machine maps to nothing", func(t *testing.T) {
g := NewWithT(t)
r := newTestReconciler(cluster)
mapFn := r.cloudscaleMachineToCluster(context.Background(), r.Client)

worker := testutils.NewControlPlaneMachine("worker-0", "srv-1")
delete(worker.Labels, clusterv1.MachineControlPlaneLabel)

g.Expect(mapFn(context.Background(), worker)).To(BeEmpty())
})
}

// TestCloudscaleClusterReconciler_Reconcile_EntryPoint covers the cheap
// early-exit paths of Reconcile against the envtest API server.
func TestCloudscaleClusterReconciler_Reconcile_EntryPoint(t *testing.T) {
Expand Down
42 changes: 31 additions & 11 deletions internal/controller/cloudscalecluster_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,21 +142,14 @@ func (r *CloudscaleClusterReconciler) reconcileLoadBalancer(ctx context.Context,
return result, nil
}

// 4. Reconcile the health monitor
if result, err := r.reconcileLBHealthMonitor(ctx, clusterScope); err != nil {
return ctrl.Result{}, fmt.Errorf("reconciling load balancer health monitor: %w", err)
} else if !result.IsZero() {
return result, nil
}

// 5. Reconcile the members
// 4. Reconcile the members
if result, err := r.reconcileLBMembers(ctx, clusterScope); err != nil {
return ctrl.Result{}, fmt.Errorf("reconciling load balancer members: %w", err)
} else if !result.IsZero() {
return result, nil
}

// 6. Set the control plane endpoint from the VIP
// 5. Set the control plane endpoint from the VIP
if clusterScope.CloudscaleCluster.Spec.ControlPlaneEndpoint.Host == "" {
if clusterScope.CloudscaleCluster.Spec.FloatingIP != nil {
// Floating IP is configured — the FIP reconciler will set the endpoint.
Expand All @@ -172,9 +165,21 @@ func (r *CloudscaleClusterReconciler) reconcileLoadBalancer(ctx context.Context,
}
}

// The LB is not running (degraded/error).
// We poll for the LB status since we don't have a way to watch the loadbalancer status otherwise.
if lbPending {
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

return ctrl.Result{}, nil
}

// controlPlaneInitialized denotes when the control plane is functional enough to accept requests.
// Never goes back to false once true.
func controlPlaneInitialized(clusterScope *scope.ClusterScope) bool {
return ptr.Deref(clusterScope.Cluster.Status.Initialization.ControlPlaneInitialized, false)
}

// reconcileLB ensures the load balancer exists.
func (r *CloudscaleClusterReconciler) reconcileLB(ctx context.Context, clusterScope *scope.ClusterScope) (ctrl.Result, error) {
_, id, err := ensureResource(ctx, clusterScope,
Expand Down Expand Up @@ -338,9 +343,24 @@ func (r *CloudscaleClusterReconciler) reconcileLBListener(ctx context.Context, c
return ctrl.Result{}, nil
}

// reconcileLBHealthMonitor ensures the load balancer health monitor exists.
// reconcileLBHealthMonitor ensures the load balancer health monitor exists once the control plane is initialized.
//
// Without a health monitor, all members are marked as `no_monitor` and traffic will be routed to them regardless
// if they respond or not.
// The health monitor performs TCP health checks on the API server port.
// This is called as the last phase of reconcileNormal, after the cluster is marked provisioned.
func (r *CloudscaleClusterReconciler) reconcileLBHealthMonitor(ctx context.Context, clusterScope *scope.ClusterScope) (ctrl.Result, error) {
// External control plane: no load balancer, so no health monitor.
if !ptr.Deref(clusterScope.CloudscaleCluster.Spec.ControlPlaneLoadBalancer.Enabled, true) {
return ctrl.Result{}, nil
}

// Defer the initial creation until the control plane is initialized.
if clusterScope.CloudscaleCluster.Status.LoadBalancerHealthMonitorID == "" && !controlPlaneInitialized(clusterScope) {
clusterScope.Info("Waiting for control-plane initialized before creating health monitor")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

_, id, err := ensureResource(ctx, clusterScope,
clusterScope.CloudscaleCluster.Status.LoadBalancerHealthMonitorID,
"load balancer health monitor",
Expand Down Expand Up @@ -406,7 +426,7 @@ func (r *CloudscaleClusterReconciler) reconcileLBMembers(ctx context.Context, cl
return ctrl.Result{}, fmt.Errorf("failed to get desired load balancer members: %w", err)
}

clusterScope.V(2).Info("reconcileLBMembers", "currentMembers", currentMembers, "desiredMembers", desiredMembers)
clusterScope.V(2).Info("Reconciling LB Members", "currentMembers", currentMembers, "desiredMembers", desiredMembers)

// Build maps keyed by member name for comparison
currentByName := make(map[string]cloudscalesdk.LoadBalancerPoolMember, len(currentMembers))
Expand Down
72 changes: 68 additions & 4 deletions internal/controller/cloudscalecluster_loadbalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ func TestReconcileLBHealthMonitor(t *testing.T) {
}

clusterScope := newLBClusterScope(
testutils.WithControlPlaneInitialized(true),
testutils.WithHMService(healthMonitorService),
testutils.WithHealthMonitorParams(tc.delayS, tc.timeoutS, tc.upThreshold, tc.downThresh),
)
Expand Down Expand Up @@ -333,7 +334,7 @@ func TestReconcileLoadBalancer_ErrorStatusProceedsWithMemberReconciliation(t *te
result, err := r.reconcileLoadBalancer(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(BeZero(), "error status should not block member reconciliation")
g.Expect(result.RequeueAfter).To(Equal(10*time.Second), "error status should requeue to re-check the LB after member reconciliation")

// LoadBalancerReadyCondition should be False because LB is not running
cond := conditions.Get(clusterScope.CloudscaleCluster, infrastructurev1beta2.LoadBalancerReadyCondition)
Expand Down Expand Up @@ -371,7 +372,7 @@ func TestReconcileLoadBalancer_DegradedStatusProceedsWithMemberReconciliation(t
result, err := r.reconcileLoadBalancer(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(BeZero(), "degraded status should not block member reconciliation")
g.Expect(result.RequeueAfter).To(Equal(10*time.Second), "degraded status should requeue to re-check the LB after member reconciliation")

// Stale member should have been removed
g.Expect(deletedMemberUUID).To(Equal("stale-uuid"))
Expand Down Expand Up @@ -433,17 +434,80 @@ func TestReconcileLoadBalancer_SetsControlPlaneEndpoint(t *testing.T) {
clusterScope.CloudscaleCluster.Status.LoadBalancerID = testLBUUID
clusterScope.CloudscaleCluster.Status.LoadBalancerPoolID = testPoolUUID
clusterScope.CloudscaleCluster.Status.LoadBalancerListenerID = testListenerUUID
clusterScope.CloudscaleCluster.Status.LoadBalancerHealthMonitorID = testHMUUID

r := newTestReconciler()

_, err := r.reconcileLoadBalancer(context.Background(), clusterScope)
result, err := r.reconcileLoadBalancer(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.IsZero()).To(BeTrue(), "reconcileLoadBalancer must not requeue for the health monitor")
g.Expect(clusterScope.CloudscaleCluster.Spec.ControlPlaneEndpoint.Host).To(Equal("203.0.113.10"))
g.Expect(clusterScope.CloudscaleCluster.Spec.ControlPlaneEndpoint.Port).To(Equal(int32(6443)))
}

// hmServiceWithCreate returns a health-monitor mock whose List is empty and
// whose Create flips *created and returns a fixed UUID.
func hmServiceWithCreate(created *bool) *testutils.MockLoadBalancerHealthMonitorService {
return &testutils.MockLoadBalancerHealthMonitorService{
ListFn: func(ctx context.Context, modifiers ...cloudscalesdk.ListRequestModifier) ([]cloudscalesdk.LoadBalancerHealthMonitor, error) {
return nil, nil
},
CreateFn: func(ctx context.Context, req *cloudscalesdk.LoadBalancerHealthMonitorRequest) (*cloudscalesdk.LoadBalancerHealthMonitor, error) {
*created = true
return &cloudscalesdk.LoadBalancerHealthMonitor{UUID: "hm-uuid"}, nil
},
}
}

func TestReconcileLBHealthMonitor_RequeuesUntilControlPlaneInitialized(t *testing.T) {
g := NewWithT(t)

var created bool
clusterScope := newLBClusterScope(testutils.WithHMService(hmServiceWithCreate(&created)))
clusterScope.CloudscaleCluster.Status.LoadBalancerPoolID = testPoolUUID

result, err := newTestReconciler().reconcileLBHealthMonitor(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(Equal(10*time.Second), "must poll until the control plane is initialized")
g.Expect(created).To(BeFalse(), "health monitor must not be created before the control plane is initialized")
g.Expect(clusterScope.CloudscaleCluster.Status.LoadBalancerHealthMonitorID).To(BeEmpty())
}

func TestReconcileLBHealthMonitor_CreatesWhenControlPlaneInitialized(t *testing.T) {
g := NewWithT(t)

var created bool
clusterScope := newLBClusterScope(
testutils.WithControlPlaneInitialized(true),
testutils.WithHMService(hmServiceWithCreate(&created)),
)
clusterScope.CloudscaleCluster.Status.LoadBalancerPoolID = testPoolUUID

result, err := newTestReconciler().reconcileLBHealthMonitor(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.IsZero()).To(BeTrue())
g.Expect(created).To(BeTrue(), "health monitor must be created once the control plane is initialized")
g.Expect(clusterScope.CloudscaleCluster.Status.LoadBalancerHealthMonitorID).To(Equal("hm-uuid"))
}

func TestReconcileLBHealthMonitor_SkipsWhenLBDisabled(t *testing.T) {
g := NewWithT(t)

var created bool
clusterScope := newLBClusterScope(
testutils.WithLBEnabled(false),
testutils.WithHMService(hmServiceWithCreate(&created)),
)

result, err := newTestReconciler().reconcileLBHealthMonitor(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.IsZero()).To(BeTrue())
g.Expect(created).To(BeFalse(), "external control plane has no health monitor")
}

// ============================================================================
// Tests for deleteLoadBalancer
// ============================================================================
Expand Down
27 changes: 27 additions & 0 deletions internal/controller/cloudscalecluster_reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ package controller
import (
"context"
"testing"
"time"

cloudscalesdk "github.com/cloudscale-ch/cloudscale-go-sdk/v9"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -126,6 +127,32 @@ func TestReconcileNormal_FullyProvisionedCluster(t *testing.T) {
g.Expect(readyCond.Status).To(Equal(metav1.ConditionTrue))
}

// TestReconcileNormal_RequeuesForHealthMonitorWithoutBlockingProvisioning ensures a provisioned cluster will poll
// for control plane initialized (in reconcile health monitor).
func TestReconcileNormal_RequeuesForHealthMonitorWithoutBlockingProvisioning(t *testing.T) {
g := NewWithT(t)

clusterScope := defaultProvisionedScope()
clusterScope.CloudscaleCluster.Status.Networks = []infrastructurev1beta2.NetworkStatus{
{Name: "test", NetworkID: "net-123", SubnetID: "subnet-123", Managed: true},
}
clusterScope.CloudscaleCluster.Status.LoadBalancerID = "lb-123"
clusterScope.CloudscaleCluster.Status.LoadBalancerPoolID = "pool-123"
clusterScope.CloudscaleCluster.Status.LoadBalancerListenerID = "listener-123"

r := newTestReconciler()

result, err := r.reconcileNormal(context.Background(), clusterScope)

g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(Equal(10*time.Second), "must keep polling for control-plane initialization")

// provisioning and controlPlaneEndpoint should be set, even though we requeue due to the logic in reconciling the health monitor.
g.Expect(clusterScope.CloudscaleCluster.Status.Initialization).ToNot(BeNil())
g.Expect(*clusterScope.CloudscaleCluster.Status.Initialization.Provisioned).To(BeTrue())
g.Expect(clusterScope.CloudscaleCluster.Spec.ControlPlaneEndpoint.Host).ToNot(BeEmpty())
}

// TestReconcileDelete_SuccessfulDeletion is the top-level happy-path smoke test
// for reconcileDelete: deletes the LB and the network, clears all status IDs,
// removes the finalizer, and sets the Deleting/Ready conditions.
Expand Down
9 changes: 9 additions & 0 deletions internal/testutils/cluster_scope.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,15 @@ func WithGeneration(gen int64) ClusterScopeOption {
}
}

// WithControlPlaneInitialized sets the CAPI Cluster ControlPlaneInitialized flag.
func WithControlPlaneInitialized(v bool) ClusterScopeOption {
return func(cs *scope.ClusterScope) {
cs.Cluster.Status.Initialization = clusterv1.ClusterInitializationStatus{
ControlPlaneInitialized: new(v),
}
}
}

// MachineScopeOption configures a MachineScope for testing.
type MachineScopeOption func(*scope.MachineScope)

Expand Down
4 changes: 2 additions & 2 deletions internal/testutils/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import (
)

// NewControlPlaneMachine builds a control-plane CloudscaleMachine fixture in
// the "default" namespace, labelled as control-plane of "test-cluster" and
// with the given ServerID. Used heavily by the FIP and LB controller tests.
// the "default" namespace, labeled as control-plane of "test-cluster" and
// with the given ServerID.
func NewControlPlaneMachine(name, serverID string) *infrastructurev1beta2.CloudscaleMachine {
m := &infrastructurev1beta2.CloudscaleMachine{
ObjectMeta: metav1.ObjectMeta{
Expand Down