Skip to content

Commit 255895b

Browse files
committed
Extend alert risk e2e
There was an issue of duplicated conditional risks when * a risk whose PromQL cannot be evaluated, and * the risk from alert was accepted. The issue is fixed by [cvo#1367](openshift#1367) and this pull is to extend the current alert risk e2e to cover the case.
1 parent 9679c0f commit 255895b

1 file changed

Lines changed: 62 additions & 10 deletions

File tree

test/cvo/accept_risks.go

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
3838
backup configv1.ClusterVersionSpec
3939
)
4040

41+
const (
42+
prometheusRuleForTesting = "testing"
43+
alertNameForTesting = "TestAlertFeatureE2ETestOTA1813"
44+
)
45+
4146
g.BeforeEach(func() {
4247
c, err = util.GetRestConfig()
4348
o.Expect(err).To(o.BeNil())
@@ -60,6 +65,10 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
6065
})
6166

6267
g.AfterEach(func() {
68+
err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, prometheusRuleForTesting, metav1.DeleteOptions{})
69+
if !errors.IsNotFound(err) {
70+
o.Expect(err).To(o.BeNil())
71+
}
6372
if needRecover {
6473
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
6574
o.Expect(err).NotTo(o.HaveOccurred())
@@ -78,7 +87,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
7887

7988
g.By("Using fauxinnati as the upstream and its simple channel")
8089
cv.Spec.Upstream = util.FauxinnatiAPIURL
81-
cv.Spec.Channel = "simple"
90+
cv.Spec.Channel = "OTA-1813"
8291

8392
_, err = configClient.ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{})
8493
o.Expect(err).NotTo(o.HaveOccurred())
@@ -87,7 +96,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
8796
g.By("Create a critical alert for testing")
8897
prometheusRule := &monitoringv1.PrometheusRule{
8998
ObjectMeta: metav1.ObjectMeta{
90-
Name: "testing",
99+
Name: prometheusRuleForTesting,
91100
Namespace: external.DefaultCVONamespace,
92101
},
93102
Spec: monitoringv1.PrometheusRuleSpec{
@@ -96,7 +105,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
96105
Name: "test",
97106
Rules: []monitoringv1.Rule{
98107
{
99-
Alert: "TestAlertFeatureE2ETestOTA1813",
108+
Alert: alertNameForTesting,
100109
Annotations: map[string]string{"summary": "Test summary.", "description": "Test description."},
101110
Expr: intstr.IntOrString{
102111
Type: intstr.String,
@@ -111,19 +120,13 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
111120
}
112121
created, err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Create(ctx, prometheusRule, metav1.CreateOptions{})
113122
o.Expect(err).NotTo(o.HaveOccurred())
114-
defer func() {
115-
err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
116-
if !errors.IsNotFound(err) {
117-
o.Expect(err).To(o.BeNil())
118-
}
119-
}()
120123

121124
g.By("Checking if the risk shows up in ClusterVersion's status")
122125
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 10*time.Minute, true, func(ctx context.Context) (done bool, err error) {
123126
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
124127
o.Expect(err).NotTo(o.HaveOccurred())
125128
for _, risk := range cv.Status.ConditionalUpdateRisks {
126-
if risk.Name == "TestAlertFeatureE2ETestOTA1813" {
129+
if risk.Name == alertNameForTesting {
127130
if c := meta.FindStatusCondition(risk.Conditions, external.ConditionalUpdateRiskConditionTypeApplies); c != nil {
128131
if c.Status == metav1.ConditionTrue && external.IsAlertConditionReason(c.Reason) {
129132
return true, nil
@@ -149,6 +152,55 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
149152
}
150153
return true, nil
151154
})).NotTo(o.HaveOccurred(), "still recommending updates while alert is firing")
155+
156+
g.By("Checking that there are recommended conditional updates after the risk from alert is accepted")
157+
cv, err = configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
158+
o.Expect(err).NotTo(o.HaveOccurred())
159+
if cv.Spec.DesiredUpdate == nil {
160+
cv.Spec.DesiredUpdate = &configv1.Update{}
161+
}
162+
cv.Spec.DesiredUpdate.AcceptRisks = append(cv.Spec.DesiredUpdate.AcceptRisks, configv1.AcceptRisk{Name: alertNameForTesting})
163+
_, err = configClient.ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{})
164+
o.Expect(err).NotTo(o.HaveOccurred())
165+
166+
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 5*time.Minute, true, func(ctx context.Context) (done bool, err error) {
167+
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
168+
o.Expect(err).NotTo(o.HaveOccurred())
169+
for _, cu := range cv.Status.ConditionalUpdates {
170+
condition := meta.FindStatusCondition(cu.Conditions, external.ConditionalUpdateConditionTypeRecommended)
171+
if condition != nil && condition.Status == metav1.ConditionTrue {
172+
return true, nil
173+
}
174+
}
175+
return false, nil
176+
})).NotTo(o.HaveOccurred(), "still no recommended conditional updates after the risk from alert is accepted")
177+
178+
g.By("Checking that there are no duplicated versions in the status of Cluster Version")
179+
cv, err = configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
180+
o.Expect(err).NotTo(o.HaveOccurred())
181+
versions := sets.New[string]()
182+
for _, cu := range cv.Status.ConditionalUpdates {
183+
o.Expect(versions.Has(cu.Release.Version)).To(o.Equal(false))
184+
versions.Insert(cu.Release.Version)
185+
}
186+
for _, u := range cv.Status.AvailableUpdates {
187+
o.Expect(versions.Has(u.Version)).To(o.Equal(false))
188+
versions.Insert(u.Version)
189+
}
190+
191+
g.By("Checking that there are available updates after the alert is resolved")
192+
err = monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
193+
o.Expect(err).NotTo(o.HaveOccurred())
194+
195+
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 5*time.Minute, true, func(ctx context.Context) (done bool, err error) {
196+
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
197+
o.Expect(err).NotTo(o.HaveOccurred())
198+
if len(cv.Status.AvailableUpdates) == 0 {
199+
return false, nil
200+
}
201+
return true, nil
202+
})).NotTo(o.HaveOccurred(), "still no available updates after the alert is resolved")
203+
152204
})
153205

154206
g.It("should work with accept risks", g.Label("Serial"), func() {

0 commit comments

Comments
 (0)