Skip to content

Commit 1a42691

Browse files
committed
Extend alert risk e2e
There was an issue of duplicated conditional risks when * a risk whose PromQL cannot be evaluated, and * the risk from alert was accepted. The issue is fixed by [cvo#1367](#1367) and this pull is to extend the current alert risk e2e to cover the case.
1 parent 9679c0f commit 1a42691

1 file changed

Lines changed: 72 additions & 10 deletions

File tree

test/cvo/accept_risks.go

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
o "github.com/onsi/gomega"
99
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
1010
prometheusoperatorv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1"
11+
"sigs.k8s.io/yaml"
1112

1213
"k8s.io/apimachinery/pkg/api/errors"
1314
"k8s.io/apimachinery/pkg/api/meta"
@@ -38,6 +39,11 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
3839
backup configv1.ClusterVersionSpec
3940
)
4041

42+
const (
43+
prometheusRuleForTesting = "testing"
44+
alertNameForTesting = "TestAlertFeatureE2ETestOTA1813"
45+
)
46+
4147
g.BeforeEach(func() {
4248
c, err = util.GetRestConfig()
4349
o.Expect(err).To(o.BeNil())
@@ -60,6 +66,10 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
6066
})
6167

6268
g.AfterEach(func() {
69+
err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, prometheusRuleForTesting, metav1.DeleteOptions{})
70+
if !errors.IsNotFound(err) {
71+
o.Expect(err).To(o.BeNil())
72+
}
6373
if needRecover {
6474
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
6575
o.Expect(err).NotTo(o.HaveOccurred())
@@ -78,7 +88,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
7888

7989
g.By("Using fauxinnati as the upstream and its simple channel")
8090
cv.Spec.Upstream = util.FauxinnatiAPIURL
81-
cv.Spec.Channel = "simple"
91+
cv.Spec.Channel = "OTA-1813"
8292

8393
_, err = configClient.ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{})
8494
o.Expect(err).NotTo(o.HaveOccurred())
@@ -87,7 +97,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
8797
g.By("Create a critical alert for testing")
8898
prometheusRule := &monitoringv1.PrometheusRule{
8999
ObjectMeta: metav1.ObjectMeta{
90-
Name: "testing",
100+
Name: prometheusRuleForTesting,
91101
Namespace: external.DefaultCVONamespace,
92102
},
93103
Spec: monitoringv1.PrometheusRuleSpec{
@@ -96,7 +106,7 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
96106
Name: "test",
97107
Rules: []monitoringv1.Rule{
98108
{
99-
Alert: "TestAlertFeatureE2ETestOTA1813",
109+
Alert: alertNameForTesting,
100110
Annotations: map[string]string{"summary": "Test summary.", "description": "Test description."},
101111
Expr: intstr.IntOrString{
102112
Type: intstr.String,
@@ -111,19 +121,13 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
111121
}
112122
created, err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Create(ctx, prometheusRule, metav1.CreateOptions{})
113123
o.Expect(err).NotTo(o.HaveOccurred())
114-
defer func() {
115-
err := monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
116-
if !errors.IsNotFound(err) {
117-
o.Expect(err).To(o.BeNil())
118-
}
119-
}()
120124

121125
g.By("Checking if the risk shows up in ClusterVersion's status")
122126
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 10*time.Minute, true, func(ctx context.Context) (done bool, err error) {
123127
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
124128
o.Expect(err).NotTo(o.HaveOccurred())
125129
for _, risk := range cv.Status.ConditionalUpdateRisks {
126-
if risk.Name == "TestAlertFeatureE2ETestOTA1813" {
130+
if risk.Name == alertNameForTesting {
127131
if c := meta.FindStatusCondition(risk.Conditions, external.ConditionalUpdateRiskConditionTypeApplies); c != nil {
128132
if c.Status == metav1.ConditionTrue && external.IsAlertConditionReason(c.Reason) {
129133
return true, nil
@@ -149,6 +153,55 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
149153
}
150154
return true, nil
151155
})).NotTo(o.HaveOccurred(), "still recommending updates while alert is firing")
156+
157+
g.By("Checking that there are recommended conditional updates after the risk from alert is accepted")
158+
cv, err = configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
159+
o.Expect(err).NotTo(o.HaveOccurred())
160+
if cv.Spec.DesiredUpdate == nil {
161+
cv.Spec.DesiredUpdate = &configv1.Update{}
162+
}
163+
cv.Spec.DesiredUpdate.AcceptRisks = append(cv.Spec.DesiredUpdate.AcceptRisks, configv1.AcceptRisk{Name: alertNameForTesting})
164+
_, err = configClient.ClusterVersions().Update(ctx, cv, metav1.UpdateOptions{})
165+
o.Expect(err).NotTo(o.HaveOccurred())
166+
167+
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 5*time.Minute, true, func(ctx context.Context) (done bool, err error) {
168+
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
169+
o.Expect(err).NotTo(o.HaveOccurred())
170+
for _, cu := range cv.Status.ConditionalUpdates {
171+
condition := meta.FindStatusCondition(cu.Conditions, external.ConditionalUpdateConditionTypeRecommended)
172+
if condition != nil && condition.Status == metav1.ConditionTrue {
173+
return true, nil
174+
}
175+
}
176+
return false, nil
177+
})).NotTo(o.HaveOccurred(), "still no recommended conditional updates after the risk from alert is accepted")
178+
179+
g.By("Checking that there are no duplicated versions in the status of Cluster Version")
180+
cv, err = configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
181+
o.Expect(err).NotTo(o.HaveOccurred())
182+
versions := sets.New[string]()
183+
for _, cu := range cv.Status.ConditionalUpdates {
184+
o.Expect(versions.Has(cu.Release.Version)).To(o.BeFalse(), "ConditionalUpdates.Release.Version %s is duplicated. cv/version is:\n%s", cu.Release.Version, getYaml(*cv))
185+
versions.Insert(cu.Release.Version)
186+
}
187+
for _, u := range cv.Status.AvailableUpdates {
188+
o.Expect(versions.Has(u.Version)).To(o.BeFalse(), "AvailableUpdates.Version %s is duplicated. cv/version is:\n%s", u.Version, getYaml(*cv))
189+
versions.Insert(u.Version)
190+
}
191+
192+
g.By("Checking that there are available updates after the alert is resolved")
193+
err = monitoringClient.PrometheusRules(external.DefaultCVONamespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
194+
o.Expect(err).NotTo(o.HaveOccurred())
195+
196+
o.Expect(wait.PollUntilContextTimeout(ctx, 30*time.Second, 5*time.Minute, true, func(ctx context.Context) (done bool, err error) {
197+
cv, err := configClient.ClusterVersions().Get(ctx, external.DefaultClusterVersionName, metav1.GetOptions{})
198+
o.Expect(err).NotTo(o.HaveOccurred())
199+
if len(cv.Status.AvailableUpdates) == 0 {
200+
return false, nil
201+
}
202+
return true, nil
203+
})).NotTo(o.HaveOccurred(), "still no available updates after the alert is resolved")
204+
152205
})
153206

154207
g.It("should work with accept risks", g.Label("Serial"), func() {
@@ -239,3 +292,12 @@ var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`,
239292
}
240293
})
241294
})
295+
296+
func getYaml(cv configv1.ClusterVersion) string {
297+
raw, err := yaml.Marshal(cv)
298+
if err != nil {
299+
logger.Error(err, "failed to marshal ClusterVersion")
300+
return ""
301+
}
302+
return string(raw)
303+
}

0 commit comments

Comments
 (0)