|
| 1 | +/* |
| 2 | +Copyright 2026 The RBG Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package readiness |
| 18 | + |
| 19 | +import ( |
| 20 | + "fmt" |
| 21 | + "testing" |
| 22 | + |
| 23 | + v1 "k8s.io/api/core/v1" |
| 24 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 25 | + "sigs.k8s.io/rbgs/api/workloads/constants" |
| 26 | + podadapter "sigs.k8s.io/rbgs/pkg/inplace/pod/clientadapter" |
| 27 | +) |
| 28 | + |
| 29 | +// fakeAdapter is a minimal in-memory Adapter for unit tests. |
| 30 | +type fakeAdapter struct { |
| 31 | + pod *v1.Pod |
| 32 | +} |
| 33 | + |
| 34 | +func (f *fakeAdapter) GetPod(namespace, name string) (*v1.Pod, error) { |
| 35 | + if f.pod == nil || f.pod.Namespace != namespace || f.pod.Name != name { |
| 36 | + return nil, fmt.Errorf("pod %s/%s not found", namespace, name) |
| 37 | + } |
| 38 | + return f.pod.DeepCopy(), nil |
| 39 | +} |
| 40 | + |
| 41 | +func (f *fakeAdapter) UpdatePod(pod *v1.Pod) error { |
| 42 | + f.pod = pod.DeepCopy() |
| 43 | + return nil |
| 44 | +} |
| 45 | + |
| 46 | +func (f *fakeAdapter) UpdatePodStatus(pod *v1.Pod) error { |
| 47 | + f.pod = pod.DeepCopy() |
| 48 | + return nil |
| 49 | +} |
| 50 | + |
| 51 | +var _ podadapter.Adapter = &fakeAdapter{} |
| 52 | + |
| 53 | +// newReadyPod creates a pod that simulates the state after KWOK's pod-ready |
| 54 | +// stage fires: all conditions True, both readiness gates present and True. |
| 55 | +func newReadyPod() *v1.Pod { |
| 56 | + return &v1.Pod{ |
| 57 | + ObjectMeta: metav1.ObjectMeta{ |
| 58 | + Namespace: "test-ns", |
| 59 | + Name: "test-pod", |
| 60 | + }, |
| 61 | + Spec: v1.PodSpec{ |
| 62 | + ReadinessGates: []v1.PodReadinessGate{ |
| 63 | + {ConditionType: constants.InstancePodReadyConditionType}, |
| 64 | + {ConditionType: "InPlaceUpdateReady"}, |
| 65 | + }, |
| 66 | + }, |
| 67 | + Status: v1.PodStatus{ |
| 68 | + Conditions: []v1.PodCondition{ |
| 69 | + { |
| 70 | + Type: v1.PodReady, |
| 71 | + Status: v1.ConditionTrue, |
| 72 | + LastTransitionTime: metav1.Now(), |
| 73 | + }, |
| 74 | + { |
| 75 | + Type: v1.ContainersReady, |
| 76 | + Status: v1.ConditionTrue, |
| 77 | + LastTransitionTime: metav1.Now(), |
| 78 | + }, |
| 79 | + { |
| 80 | + Type: constants.InstancePodReadyConditionType, |
| 81 | + Status: v1.ConditionTrue, |
| 82 | + Message: "[]", |
| 83 | + LastTransitionTime: metav1.Now(), |
| 84 | + }, |
| 85 | + { |
| 86 | + Type: "InPlaceUpdateReady", |
| 87 | + Status: v1.ConditionTrue, |
| 88 | + LastTransitionTime: metav1.Now(), |
| 89 | + }, |
| 90 | + { |
| 91 | + Type: v1.PodScheduled, |
| 92 | + Status: v1.ConditionTrue, |
| 93 | + LastTransitionTime: metav1.Now(), |
| 94 | + }, |
| 95 | + }, |
| 96 | + }, |
| 97 | + } |
| 98 | +} |
| 99 | + |
| 100 | +func getPodConditionStatus(pod *v1.Pod, condType v1.PodConditionType) v1.ConditionStatus { |
| 101 | + for _, c := range pod.Status.Conditions { |
| 102 | + if c.Type == condType { |
| 103 | + return c.Status |
| 104 | + } |
| 105 | + } |
| 106 | + return "" |
| 107 | +} |
| 108 | + |
| 109 | +// TestRemoveNotReadyKey_UpdatesPodReadyCondition reproduces the bug where |
| 110 | +// removeNotReadyKey did not re-evaluate the pod's Ready condition after |
| 111 | +// setting a readiness gate back to True. This caused pods to stay |
| 112 | +// Ready=False permanently in KWOK environments. |
| 113 | +// |
| 114 | +// The sequence: |
| 115 | +// 1. KWOK sets pod Ready=True with all readiness gates True |
| 116 | +// 2. addNotReadyKey sets InstancePodReady=False → Ready becomes False |
| 117 | +// 3. removeNotReadyKey sets InstancePodReady=True → Ready must become True |
| 118 | +// |
| 119 | +// Before the fix, step 3 left Ready=False because UpdatePodReadyCondition |
| 120 | +// was not called. |
| 121 | +func TestRemoveNotReadyKey_UpdatesPodReadyCondition(t *testing.T) { |
| 122 | + pod := newReadyPod() |
| 123 | + adp := &fakeAdapter{pod: pod} |
| 124 | + |
| 125 | + msg := Message{UserAgent: "Lifecycle", Key: "InstanceReady"} |
| 126 | + condType := constants.InstancePodReadyConditionType |
| 127 | + |
| 128 | + // Step 1: Verify initial state — pod is fully ready (simulates KWOK). |
| 129 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionTrue { |
| 130 | + t.Fatalf("precondition: expected Ready=True, got %s", s) |
| 131 | + } |
| 132 | + |
| 133 | + // Step 2: addNotReadyKey — sets InstancePodReady=False and Ready=False. |
| 134 | + modified, err := addNotReadyKey(adp, adp.pod, msg, condType) |
| 135 | + if err != nil { |
| 136 | + t.Fatalf("addNotReadyKey failed: %v", err) |
| 137 | + } |
| 138 | + if !modified { |
| 139 | + t.Fatal("addNotReadyKey should have modified the pod") |
| 140 | + } |
| 141 | + if s := getPodConditionStatus(adp.pod, condType); s != v1.ConditionFalse { |
| 142 | + t.Fatalf("after addNotReadyKey: expected InstancePodReady=False, got %s", s) |
| 143 | + } |
| 144 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionFalse { |
| 145 | + t.Fatalf("after addNotReadyKey: expected Ready=False, got %s", s) |
| 146 | + } |
| 147 | + |
| 148 | + // Step 3: removeNotReadyKey — sets InstancePodReady=True. |
| 149 | + // This is the critical assertion: Ready must also become True. |
| 150 | + modified, err = removeNotReadyKey(adp, adp.pod, msg, condType) |
| 151 | + if err != nil { |
| 152 | + t.Fatalf("removeNotReadyKey failed: %v", err) |
| 153 | + } |
| 154 | + if !modified { |
| 155 | + t.Fatal("removeNotReadyKey should have modified the pod") |
| 156 | + } |
| 157 | + if s := getPodConditionStatus(adp.pod, condType); s != v1.ConditionTrue { |
| 158 | + t.Fatalf("after removeNotReadyKey: expected InstancePodReady=True, got %s", s) |
| 159 | + } |
| 160 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionTrue { |
| 161 | + t.Fatalf("after removeNotReadyKey: expected Ready=True, got %s (bug: UpdatePodReadyCondition not called)", s) |
| 162 | + } |
| 163 | +} |
| 164 | + |
| 165 | +// TestAddRemoveNotReadyKey_MultipleKeys verifies that Ready only becomes True |
| 166 | +// once all not-ready keys are removed, not just one. |
| 167 | +func TestAddRemoveNotReadyKey_MultipleKeys(t *testing.T) { |
| 168 | + pod := newReadyPod() |
| 169 | + adp := &fakeAdapter{pod: pod} |
| 170 | + |
| 171 | + msg1 := Message{UserAgent: "Lifecycle", Key: "InstanceReady"} |
| 172 | + msg2 := Message{UserAgent: "InPlaceUpdate", Key: "Updating"} |
| 173 | + condType := constants.InstancePodReadyConditionType |
| 174 | + |
| 175 | + // Add two not-ready keys. |
| 176 | + if _, err := addNotReadyKey(adp, adp.pod, msg1, condType); err != nil { |
| 177 | + t.Fatalf("addNotReadyKey(msg1) failed: %v", err) |
| 178 | + } |
| 179 | + if _, err := addNotReadyKey(adp, adp.pod, msg2, condType); err != nil { |
| 180 | + t.Fatalf("addNotReadyKey(msg2) failed: %v", err) |
| 181 | + } |
| 182 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionFalse { |
| 183 | + t.Fatalf("after adding two keys: expected Ready=False, got %s", s) |
| 184 | + } |
| 185 | + |
| 186 | + // Remove first key — Ready should remain False (one key still present). |
| 187 | + if _, err := removeNotReadyKey(adp, adp.pod, msg1, condType); err != nil { |
| 188 | + t.Fatalf("removeNotReadyKey(msg1) failed: %v", err) |
| 189 | + } |
| 190 | + if s := getPodConditionStatus(adp.pod, condType); s != v1.ConditionFalse { |
| 191 | + t.Fatalf("after removing one key: expected InstancePodReady=False, got %s", s) |
| 192 | + } |
| 193 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionFalse { |
| 194 | + t.Fatalf("after removing one key: expected Ready=False, got %s", s) |
| 195 | + } |
| 196 | + |
| 197 | + // Remove second key — now Ready should become True. |
| 198 | + if _, err := removeNotReadyKey(adp, adp.pod, msg2, condType); err != nil { |
| 199 | + t.Fatalf("removeNotReadyKey(msg2) failed: %v", err) |
| 200 | + } |
| 201 | + if s := getPodConditionStatus(adp.pod, condType); s != v1.ConditionTrue { |
| 202 | + t.Fatalf("after removing all keys: expected InstancePodReady=True, got %s", s) |
| 203 | + } |
| 204 | + if s := getPodConditionStatus(adp.pod, v1.PodReady); s != v1.ConditionTrue { |
| 205 | + t.Fatalf("after removing all keys: expected Ready=True, got %s", s) |
| 206 | + } |
| 207 | +} |
0 commit comments