Skip to content

Commit cd8eab2

Browse files
committed
OTA-1546: Reconcile cv.spec.desiredUpdate.acceptRisks
1 parent 8b12479 commit cd8eab2

10 files changed

Lines changed: 190 additions & 32 deletions

File tree

pkg/cvo/availableupdates.go

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ import (
1616
"k8s.io/apimachinery/pkg/api/equality"
1717
"k8s.io/apimachinery/pkg/api/meta"
1818
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+
"k8s.io/apimachinery/pkg/util/sets"
1920
"k8s.io/klog/v2"
2021

2122
configv1 "github.com/openshift/api/config/v1"
2223
"github.com/openshift/cluster-version-operator/lib/resourcemerge"
2324
"github.com/openshift/cluster-version-operator/pkg/cincinnati"
2425
"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
26+
"github.com/openshift/cluster-version-operator/pkg/internal"
2527
)
2628

2729
const noArchitecture string = "NoArchitecture"
@@ -50,6 +52,12 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
5052
channel := config.Spec.Channel
5153
desiredArch := optr.getDesiredArchitecture(config.Spec.DesiredUpdate)
5254
currentArch := optr.getCurrentArchitecture()
55+
acceptRisks := sets.New[string]()
56+
if config.Spec.DesiredUpdate != nil {
57+
for _, risk := range config.Spec.DesiredUpdate.AcceptRisks {
58+
acceptRisks.Insert(risk.Name)
59+
}
60+
}
5361

5462
// updates are only checked at most once per minimumUpdateCheckInterval or if the generation changes
5563
optrAvailableUpdates := optr.getAvailableUpdates()
@@ -129,6 +137,8 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
129137
optrAvailableUpdates.UpdateService = updateService
130138
optrAvailableUpdates.Channel = channel
131139
optrAvailableUpdates.Architecture = desiredArch
140+
optrAvailableUpdates.ShouldReconcileAcceptRisks = optr.shouldReconcileAcceptRisks
141+
optrAvailableUpdates.AcceptRisks = acceptRisks
132142
optrAvailableUpdates.ConditionRegistry = optr.conditionRegistry
133143
optrAvailableUpdates.Condition = condition
134144

@@ -167,9 +177,11 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
167177
}
168178

169179
type availableUpdates struct {
170-
UpdateService string
171-
Channel string
172-
Architecture string
180+
UpdateService string
181+
Channel string
182+
Architecture string
183+
ShouldReconcileAcceptRisks func() bool
184+
AcceptRisks sets.Set[string]
173185

174186
// LastAttempt records the time of the most recent attempt at update
175187
// retrieval, regardless of whether it was successful.
@@ -192,6 +204,8 @@ type availableUpdates struct {
192204
ConditionRegistry clusterconditions.ConditionRegistry
193205

194206
Condition configv1.ClusterOperatorStatusCondition
207+
208+
RiskConditions map[string][]metav1.Condition
195209
}
196210

197211
func (u *availableUpdates) RecentlyAttempted(interval time.Duration) bool {
@@ -291,14 +305,16 @@ func (optr *Operator) getAvailableUpdates() *availableUpdates {
291305
}
292306

293307
u := &availableUpdates{
294-
UpdateService: optr.availableUpdates.UpdateService,
295-
Channel: optr.availableUpdates.Channel,
296-
Architecture: optr.availableUpdates.Architecture,
297-
LastAttempt: optr.availableUpdates.LastAttempt,
298-
LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange,
299-
Current: *optr.availableUpdates.Current.DeepCopy(),
300-
ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state
301-
Condition: optr.availableUpdates.Condition,
308+
UpdateService: optr.availableUpdates.UpdateService,
309+
Channel: optr.availableUpdates.Channel,
310+
Architecture: optr.availableUpdates.Architecture,
311+
ShouldReconcileAcceptRisks: optr.shouldReconcileAcceptRisks,
312+
AcceptRisks: optr.availableUpdates.AcceptRisks,
313+
LastAttempt: optr.availableUpdates.LastAttempt,
314+
LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange,
315+
Current: *optr.availableUpdates.Current.DeepCopy(),
316+
ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state
317+
Condition: optr.availableUpdates.Condition,
302318
}
303319

304320
if optr.availableUpdates.Updates != nil {
@@ -427,7 +443,7 @@ func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) {
427443
return vi.GTE(vj)
428444
})
429445
for i, conditionalUpdate := range u.ConditionalUpdates {
430-
condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry)
446+
condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry, u.AcceptRisks, u.ShouldReconcileAcceptRisks, u.RiskConditions)
431447

432448
if condition.Status == metav1.ConditionTrue {
433449
u.addUpdate(conditionalUpdate.Release)
@@ -485,6 +501,8 @@ const (
485501
// recommendedReasonExposed is used instead of the original name if it does
486502
// not match the pattern for a valid k8s condition reason.
487503
recommendedReasonExposed = "ExposedToRisks"
504+
505+
riskConditionReasonEvaluationFailed = "EvaluationFailed"
488506
)
489507

490508
// Reasons follow same pattern as k8s Condition Reasons
@@ -502,29 +520,57 @@ func newRecommendedReason(now, want string) string {
502520
}
503521
}
504522

505-
func evaluateConditionalUpdate(ctx context.Context, risks []configv1.ConditionalUpdateRisk, conditionRegistry clusterconditions.ConditionRegistry) metav1.Condition {
523+
func evaluateConditionalUpdate(
524+
ctx context.Context,
525+
risks []configv1.ConditionalUpdateRisk,
526+
conditionRegistry clusterconditions.ConditionRegistry,
527+
acceptRisks sets.Set[string],
528+
shouldReconcileAcceptRisks func() bool,
529+
riskConditions map[string][]metav1.Condition,
530+
) metav1.Condition {
506531
recommended := metav1.Condition{
507-
Type: ConditionalUpdateConditionTypeRecommended,
532+
Type: internal.ConditionalUpdateConditionTypeRecommended,
508533
Status: metav1.ConditionTrue,
509534
// FIXME: ObservedGeneration? That would capture upstream/channel, but not necessarily the currently-reconciling version.
510535
Reason: recommendedReasonRisksNotExposed,
511536
Message: "The update is recommended, because none of the conditional update risks apply to this cluster.",
512537
}
513538

514539
var errorMessages []string
515-
for _, risk := range risks {
540+
for i, risk := range risks {
541+
riskCondition := metav1.Condition{
542+
Type: internal.ConditionalUpdateRiskConditionTypeApplies,
543+
Status: metav1.ConditionFalse,
544+
LastTransitionTime: metav1.Now(),
545+
}
516546
if match, err := conditionRegistry.Match(ctx, risk.MatchingRules); err != nil {
547+
msg := unknownExposureMessage(risk, err)
517548
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionUnknown)
518549
recommended.Reason = newRecommendedReason(recommended.Reason, recommendedReasonEvaluationFailed)
519-
errorMessages = append(errorMessages, unknownExposureMessage(risk, err))
550+
errorMessages = append(errorMessages, msg)
551+
riskCondition.Status = metav1.ConditionUnknown
552+
riskCondition.Reason = riskConditionReasonEvaluationFailed
553+
riskCondition.Message = msg
520554
} else if match {
521-
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse)
522-
wantReason := recommendedReasonExposed
523-
if reasonPattern.MatchString(risk.Name) {
524-
wantReason = risk.Name
555+
riskCondition.Status = metav1.ConditionTrue
556+
riskCondition.Message = fmt.Sprintf("The matchingRules[%d] matches", i)
557+
if shouldReconcileAcceptRisks() && acceptRisks.Has(risk.Name) {
558+
klog.V(2).Infof("Risk with name %q is accepted by the cluster admin and thus not in the evaluation of conditional update", risk.Name)
559+
} else {
560+
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse)
561+
wantReason := recommendedReasonExposed
562+
if reasonPattern.MatchString(risk.Name) {
563+
wantReason = risk.Name
564+
}
565+
recommended.Reason = newRecommendedReason(recommended.Reason, wantReason)
566+
errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL))
525567
}
526-
recommended.Reason = newRecommendedReason(recommended.Reason, wantReason)
527-
errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL))
568+
}
569+
if riskConditions == nil {
570+
riskConditions = map[string][]metav1.Condition{}
571+
}
572+
if _, ok := riskConditions[risk.Name]; !ok {
573+
riskConditions[risk.Name] = []metav1.Condition{riskCondition}
528574
}
529575
}
530576
if len(errorMessages) > 0 {

pkg/cvo/availableupdates_test.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ import (
1919
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2020
"k8s.io/apimachinery/pkg/labels"
2121
"k8s.io/apimachinery/pkg/runtime/schema"
22+
"k8s.io/apimachinery/pkg/util/sets"
2223
"k8s.io/client-go/util/workqueue"
2324

2425
configv1 "github.com/openshift/api/config/v1"
2526
"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
2627
"github.com/openshift/cluster-version-operator/pkg/clusterconditions/always"
2728
"github.com/openshift/cluster-version-operator/pkg/clusterconditions/mock"
29+
"github.com/openshift/cluster-version-operator/pkg/featuregates"
2830
)
2931

3032
// notFoundProxyLister is a stub for ProxyLister
@@ -207,6 +209,7 @@ var cvFixture = &configv1.ClusterVersion{
207209
}
208210

209211
var availableUpdatesCmpOpts = []cmp.Option{
212+
cmpopts.IgnoreFields(availableUpdates{}, "ShouldReconcileAcceptRisks"),
210213
cmpopts.IgnoreTypes(time.Time{}),
211214
cmpopts.IgnoreInterfaces(struct {
212215
clusterconditions.ConditionRegistry
@@ -231,6 +234,7 @@ func TestSyncAvailableUpdates(t *testing.T) {
231234
Status: configv1.ConditionTrue,
232235
}
233236

237+
optr.enabledFeatureGates = featuregates.DefaultCvoGates("version")
234238
err := optr.syncAvailableUpdates(context.Background(), cvFixture)
235239

236240
if err != nil {
@@ -319,6 +323,7 @@ func TestSyncAvailableUpdates_ConditionalUpdateRecommendedConditions(t *testing.
319323
tc.modifyOriginalState(optr)
320324
tc.modifyCV(cv, fixture.expectedConditionalUpdates[0])
321325

326+
optr.enabledFeatureGates = featuregates.DefaultCvoGates("version")
322327
err := optr.syncAvailableUpdates(context.Background(), cv)
323328

324329
if err != nil {
@@ -507,7 +512,9 @@ func TestEvaluateConditionalUpdate(t *testing.T) {
507512
t.Run(tc.name, func(t *testing.T) {
508513
registry := clusterconditions.NewConditionRegistry()
509514
registry.Register("PromQL", tc.mockPromql)
510-
actual := evaluateConditionalUpdate(context.Background(), tc.risks, registry)
515+
actual := evaluateConditionalUpdate(context.Background(), tc.risks, registry, sets.New[string](), func() bool {
516+
return false
517+
}, map[string][]metav1.Condition{})
511518
if diff := cmp.Diff(tc.expected, actual); diff != "" {
512519
t.Errorf("actual condition differs from expected:\n%s", diff)
513520
}
@@ -744,6 +751,7 @@ func TestSyncAvailableUpdatesDesiredUpdate(t *testing.T) {
744751

745752
cv := cvFixture.DeepCopy()
746753
cv.Spec.DesiredUpdate = tt.args.desiredUpdate
754+
optr.enabledFeatureGates = featuregates.DefaultCvoGates("version")
747755
if err := optr.syncAvailableUpdates(context.Background(), cv); err != nil {
748756
t.Fatalf("syncAvailableUpdates() unexpected error: %v", err)
749757
}

pkg/cvo/cvo.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,3 +1091,12 @@ func (optr *Operator) shouldReconcileCVOConfiguration() bool {
10911091
// The relevant CRD and CR are not applied in HyperShift, which configures the CVO via a configuration file
10921092
return optr.enabledFeatureGates.CVOConfiguration() && !optr.hypershift
10931093
}
1094+
1095+
// shouldReconcileAcceptRisks returns whether the CVO should reconcile spec.desiredUpdate.acceptRisks and populate the
1096+
// relevant fields in status.
1097+
//
1098+
// enabledFeatureGates must be initialized before the function is called.
1099+
func (optr *Operator) shouldReconcileAcceptRisks() bool {
1100+
// HyperShift will be supported later if needed
1101+
return optr.enabledFeatureGates.AcceptRisks() && !optr.hypershift
1102+
}

pkg/cvo/metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ func (m *operatorMetrics) Describe(ch chan<- *prometheus.Desc) {
441441
func (m *operatorMetrics) collectConditionalUpdates(ch chan<- prometheus.Metric, updates []configv1.ConditionalUpdate) {
442442
for _, update := range updates {
443443
for _, condition := range update.Conditions {
444-
if condition.Type != ConditionalUpdateConditionTypeRecommended {
444+
if condition.Type != internal.ConditionalUpdateConditionTypeRecommended {
445445
continue
446446
}
447447

pkg/cvo/metrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func Test_operatorMetrics_Collect(t *testing.T) {
4747
Release: configv1.Release{Version: "4.5.6", Image: "pullspec/4.5.6"},
4848
Conditions: []metav1.Condition{
4949
{
50-
Type: ConditionalUpdateConditionTypeRecommended,
50+
Type: internal.ConditionalUpdateConditionTypeRecommended,
5151
Status: metav1.ConditionTrue,
5252
LastTransitionTime: metav1.NewTime(time.Unix(3, 0)),
5353
Reason: "RiskDoesNotApply",

pkg/cvo/status.go

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
apierrors "k8s.io/apimachinery/pkg/api/errors"
1818
"k8s.io/apimachinery/pkg/api/meta"
1919
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
"k8s.io/apimachinery/pkg/util/sets"
2021
"k8s.io/apimachinery/pkg/util/validation/field"
2122

2223
configv1 "github.com/openshift/api/config/v1"
@@ -40,7 +41,7 @@ const (
4041
)
4142

4243
func findRecommendedCondition(conditions []metav1.Condition) *metav1.Condition {
43-
return meta.FindStatusCondition(conditions, ConditionalUpdateConditionTypeRecommended)
44+
return meta.FindStatusCondition(conditions, internal.ConditionalUpdateConditionTypeRecommended)
4445
}
4546

4647
func mergeEqualVersions(current *configv1.UpdateHistory, desired configv1.Release) bool {
@@ -180,7 +181,7 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1
180181
original = config.DeepCopy()
181182
}
182183

183-
updateClusterVersionStatus(&config.Status, status, optr.release, optr.getAvailableUpdates, optr.enabledFeatureGates, validationErrs)
184+
updateClusterVersionStatus(&config.Status, status, optr.release, optr.getAvailableUpdates, optr.enabledFeatureGates, validationErrs, optr.shouldReconcileAcceptRisks)
184185

185186
if klog.V(6).Enabled() {
186187
klog.Infof("Apply config: %s", cmp.Diff(original, config))
@@ -191,9 +192,15 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1
191192
}
192193

193194
// updateClusterVersionStatus updates the passed cvStatus with the latest status information
194-
func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status *SyncWorkerStatus,
195-
release configv1.Release, getAvailableUpdates func() *availableUpdates, enabledGates featuregates.CvoGateChecker,
196-
validationErrs field.ErrorList) {
195+
func updateClusterVersionStatus(
196+
cvStatus *configv1.ClusterVersionStatus,
197+
status *SyncWorkerStatus,
198+
release configv1.Release,
199+
getAvailableUpdates func() *availableUpdates,
200+
enabledGates featuregates.CvoGateChecker,
201+
validationErrs field.ErrorList,
202+
shouldReconcileAcceptRisks func() bool,
203+
) {
197204

198205
cvStatus.ObservedGeneration = status.Generation
199206
if len(status.VersionHash) > 0 {
@@ -222,9 +229,22 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
222229
desired.Architecture = configv1.ClusterVersionArchitecture("")
223230
}
224231

232+
var riskNamesForDesiredImage []string
233+
if shouldReconcileAcceptRisks() {
234+
cvStatus.ConditionalUpdates, riskNamesForDesiredImage = conditionalUpdateWithRiskNamesAndRiskConditions(cvStatus.ConditionalUpdates, getAvailableUpdates, desired.Image)
235+
cvStatus.ConditionalUpdateRisks = conditionalUpdateRisks(cvStatus.ConditionalUpdates)
236+
}
237+
225238
risksMsg := ""
226239
if desired.Image == status.loadPayloadStatus.Update.Image {
227240
risksMsg = status.loadPayloadStatus.AcceptedRisks
241+
if shouldReconcileAcceptRisks() && len(riskNamesForDesiredImage) > 0 {
242+
if risksMsg == "" {
243+
risksMsg = fmt.Sprintf("The target release %s is exposed to the risks [%s] and accepted by CVO because either the risk is considered acceptable by the cluster admin or it does not apply to the cluster.", desired.Image, strings.Join(riskNamesForDesiredImage, ","))
244+
} else {
245+
risksMsg = fmt.Sprintf("%s; It is exposed to the risks [%s] and accepted by CVO because either the risk is considered acceptable by the cluster admin or it does not apply to the cluster.", risksMsg, strings.Join(riskNamesForDesiredImage, ","))
246+
}
247+
}
228248
}
229249

230250
mergeOperatorHistory(cvStatus, desired, status.Verified, now, status.Completed > 0, risksMsg, status.loadPayloadStatus.Local)
@@ -402,6 +422,52 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
402422
}
403423
}
404424

425+
func conditionalUpdateWithRiskNamesAndRiskConditions(conditionalUpdates []configv1.ConditionalUpdate, getAvailableUpdates func() *availableUpdates, desiredImage string) ([]configv1.ConditionalUpdate, []string) {
426+
var result []configv1.ConditionalUpdate
427+
var riskNamesForDesiredImage []string
428+
riskConditions := getAvailableUpdates().RiskConditions
429+
for _, conditionalUpdate := range conditionalUpdates {
430+
if desiredImage == conditionalUpdate.Release.Image {
431+
riskNamesForDesiredImage = conditionalUpdate.RiskNames
432+
}
433+
riskNames := sets.New[string]()
434+
for _, risk := range conditionalUpdate.Risks {
435+
riskNames.Insert(risk.Name)
436+
riskTypesToRemove := sets.New[string]()
437+
conditions := riskConditions[risk.Name]
438+
for _, condition := range risk.Conditions {
439+
if found := meta.FindStatusCondition(conditions, condition.Type); found == nil {
440+
riskTypesToRemove.Insert(condition.Type)
441+
}
442+
}
443+
for riskTypeToRemove := range riskTypesToRemove {
444+
meta.RemoveStatusCondition(&risk.Conditions, riskTypeToRemove)
445+
}
446+
for _, condition := range conditions {
447+
meta.SetStatusCondition(&risk.Conditions, condition)
448+
}
449+
}
450+
conditionalUpdate.RiskNames = sets.List[string](riskNames)
451+
result = append(result, conditionalUpdate)
452+
}
453+
return result, riskNamesForDesiredImage
454+
}
455+
456+
func conditionalUpdateRisks(conditionalUpdates []configv1.ConditionalUpdate) []configv1.ConditionalUpdateRisk {
457+
var result []configv1.ConditionalUpdateRisk
458+
riskNames := sets.New[string]()
459+
for _, conditionalUpdate := range conditionalUpdates {
460+
for _, risk := range conditionalUpdate.Risks {
461+
if riskNames.Has(risk.Name) {
462+
continue
463+
}
464+
riskNames.Insert(risk.Name)
465+
result = append(result, risk)
466+
}
467+
}
468+
return result
469+
}
470+
405471
// getReasonMessageFromError returns the reason and the message from an error.
406472
// If the reason or message is not available, an empty string is returned.
407473
func getReasonMessageFromError(err error) (reason, message string) {

0 commit comments

Comments
 (0)