@@ -28,6 +28,7 @@ import (
2828 hcmetrics "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster/metrics"
2929 hcpmanifests "github.com/openshift/hypershift/hypershift-operator/controllers/manifests"
3030 "github.com/openshift/hypershift/hypershift-operator/controllers/manifests/controlplaneoperator"
31+ etcdrecoverymanifests "github.com/openshift/hypershift/hypershift-operator/controllers/manifests/etcdrecovery"
3132 kvinfra "github.com/openshift/hypershift/kubevirtexternalinfra"
3233 "github.com/openshift/hypershift/support/api"
3334 "github.com/openshift/hypershift/support/azureutil"
@@ -47,9 +48,11 @@ import (
4748 configv1 "github.com/openshift/api/config/v1"
4849
4950 appsv1 "k8s.io/api/apps/v1"
51+ batchv1 "k8s.io/api/batch/v1"
5052 corev1 "k8s.io/api/core/v1"
5153 "k8s.io/apimachinery/pkg/api/equality"
5254 errors2 "k8s.io/apimachinery/pkg/api/errors"
55+ "k8s.io/apimachinery/pkg/api/meta"
5356 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5457 "k8s.io/apimachinery/pkg/types"
5558 "k8s.io/apimachinery/pkg/util/intstr"
@@ -6568,3 +6571,165 @@ func TestComputeEndpointServiceCondition(t *testing.T) {
65686571 })
65696572 }
65706573}
6574+
6575+ func TestReconcileETCDMemberRecovery (t * testing.T ) {
6576+ hcpNS := "clusters-test-hc"
6577+
6578+ healthyEtcdPods := func () []crclient.Object {
6579+ var pods []crclient.Object
6580+ for i := 0 ; i < 3 ; i ++ {
6581+ pods = append (pods , & corev1.Pod {
6582+ ObjectMeta : metav1.ObjectMeta {
6583+ Name : fmt .Sprintf ("etcd-%d" , i ),
6584+ Namespace : hcpNS ,
6585+ Labels : map [string ]string {"app" : "etcd" },
6586+ },
6587+ Status : corev1.PodStatus {
6588+ ContainerStatuses : []corev1.ContainerStatus {
6589+ {
6590+ Name : "etcd" ,
6591+ State : corev1.ContainerState {Running : & corev1.ContainerStateRunning {}},
6592+ },
6593+ },
6594+ },
6595+ })
6596+ }
6597+ return pods
6598+ }
6599+
6600+ healthyStatefulSet := & appsv1.StatefulSet {
6601+ ObjectMeta : metav1.ObjectMeta {
6602+ Name : "etcd" ,
6603+ Namespace : hcpNS ,
6604+ },
6605+ Status : appsv1.StatefulSetStatus {
6606+ ReadyReplicas : 3 ,
6607+ AvailableReplicas : 3 ,
6608+ },
6609+ }
6610+
6611+ unhealthyStatefulSet := & appsv1.StatefulSet {
6612+ ObjectMeta : metav1.ObjectMeta {
6613+ Name : "etcd" ,
6614+ Namespace : hcpNS ,
6615+ },
6616+ Status : appsv1.StatefulSetStatus {
6617+ ReadyReplicas : 2 ,
6618+ AvailableReplicas : 2 ,
6619+ },
6620+ }
6621+
6622+ staleCondition := metav1.Condition {
6623+ Type : string (hyperv1 .EtcdRecoveryActive ),
6624+ Status : metav1 .ConditionFalse ,
6625+ Reason : hyperv1 .EtcdRecoveryJobFailedReason ,
6626+ Message : "Error in Etcd Recovery job: the Etcd cluster requires manual intervention." ,
6627+ LastTransitionTime : metav1 .Now (),
6628+ }
6629+
6630+ failedJob := etcdrecoverymanifests .EtcdRecoveryJob (hcpNS )
6631+ failedJob .Status = batchv1.JobStatus {
6632+ Conditions : []batchv1.JobCondition {
6633+ {
6634+ Type : batchv1 .JobFailed ,
6635+ Status : corev1 .ConditionTrue ,
6636+ },
6637+ },
6638+ }
6639+
6640+ testCases := []struct {
6641+ name string
6642+ objects []crclient.Object
6643+ conditions []metav1.Condition
6644+ expectedReason string
6645+ conditionExists bool
6646+ expectJobDeleted bool
6647+ }{
6648+ {
6649+ name : "When etcd is healthy and stale EtcdRecoveryJobFailed condition exists it should clear the condition" ,
6650+ conditions : []metav1.Condition {staleCondition },
6651+ objects : append (healthyEtcdPods (), healthyStatefulSet ),
6652+ expectedReason : hyperv1 .AsExpectedReason ,
6653+ conditionExists : true ,
6654+ },
6655+ {
6656+ name : "When etcd is healthy and no EtcdRecoveryActive condition exists it should not add one" ,
6657+ conditions : []metav1.Condition {},
6658+ objects : append (healthyEtcdPods (), healthyStatefulSet ),
6659+ conditionExists : false ,
6660+ },
6661+ {
6662+ name : "When failed job exists but etcd recovered it should cleanup job and clear condition" ,
6663+ conditions : []metav1.Condition {staleCondition },
6664+ objects : append (healthyEtcdPods (), healthyStatefulSet , failedJob ),
6665+ expectedReason : hyperv1 .AsExpectedReason ,
6666+ conditionExists : true ,
6667+ expectJobDeleted : true ,
6668+ },
6669+ {
6670+ name : "When failed job exists and etcd is still unhealthy it should keep the failure condition" ,
6671+ conditions : []metav1.Condition {staleCondition },
6672+ objects : append (healthyEtcdPods (), unhealthyStatefulSet , failedJob ),
6673+ expectedReason : hyperv1 .EtcdRecoveryJobFailedReason ,
6674+ conditionExists : true ,
6675+ },
6676+ }
6677+
6678+ for _ , tc := range testCases {
6679+ t .Run (tc .name , func (t * testing.T ) {
6680+ g := NewGomegaWithT (t )
6681+
6682+ hcluster := & hyperv1.HostedCluster {
6683+ ObjectMeta : metav1.ObjectMeta {
6684+ Name : "test-hc" ,
6685+ Namespace : "clusters" ,
6686+ },
6687+ Spec : hyperv1.HostedClusterSpec {
6688+ Etcd : hyperv1.EtcdSpec {
6689+ ManagementType : hyperv1 .Managed ,
6690+ },
6691+ ControllerAvailabilityPolicy : hyperv1 .HighlyAvailable ,
6692+ },
6693+ Status : hyperv1.HostedClusterStatus {
6694+ Conditions : tc .conditions ,
6695+ },
6696+ }
6697+
6698+ objects := append ([]crclient.Object {hcluster }, tc .objects ... )
6699+ client := fake .NewClientBuilder ().
6700+ WithScheme (api .Scheme ).
6701+ WithObjects (objects ... ).
6702+ WithStatusSubresource (hcluster ).
6703+ Build ()
6704+
6705+ r := & HostedClusterReconciler {
6706+ Client : client ,
6707+ now : metav1 .Now ,
6708+ EnableEtcdRecovery : true ,
6709+ }
6710+
6711+ _ , err := r .reconcileETCDMemberRecovery (
6712+ ctrl .LoggerInto (t .Context (), zap .New (zap .UseDevMode (true ))),
6713+ hcluster ,
6714+ upsert .New (false ).CreateOrUpdate ,
6715+ )
6716+ g .Expect (err ).ToNot (HaveOccurred ())
6717+
6718+ updatedHC := & hyperv1.HostedCluster {}
6719+ g .Expect (client .Get (t .Context (), crclient .ObjectKeyFromObject (hcluster ), updatedHC )).To (Succeed ())
6720+
6721+ condition := meta .FindStatusCondition (updatedHC .Status .Conditions , string (hyperv1 .EtcdRecoveryActive ))
6722+ if tc .conditionExists {
6723+ g .Expect (condition ).ToNot (BeNil ())
6724+ g .Expect (condition .Reason ).To (Equal (tc .expectedReason ))
6725+ } else {
6726+ g .Expect (condition ).To (BeNil ())
6727+ }
6728+ if tc .expectJobDeleted {
6729+ job := etcdrecoverymanifests .EtcdRecoveryJob (hcpNS )
6730+ err := client .Get (t .Context (), crclient .ObjectKeyFromObject (job ), job )
6731+ g .Expect (errors2 .IsNotFound (err )).To (BeTrue (), "expected failed recovery job to be deleted" )
6732+ }
6733+ })
6734+ }
6735+ }
0 commit comments