Skip to content

Commit f25054a

Browse files
Merge pull request #1284 from hongkailiu/accept-cvo
OTA-1546: Reconcile cv.spec.desiredUpdate.acceptRisks
2 parents 76dfe13 + 14dffd7 commit f25054a

11 files changed

Lines changed: 605 additions & 58 deletions

File tree

pkg/cvo/availableupdates.go

Lines changed: 106 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,21 @@ import (
1212
"time"
1313

1414
"github.com/blang/semver/v4"
15+
"github.com/google/go-cmp/cmp"
16+
"github.com/google/go-cmp/cmp/cmpopts"
1517
"github.com/google/uuid"
1618
"k8s.io/apimachinery/pkg/api/equality"
1719
"k8s.io/apimachinery/pkg/api/meta"
1820
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
utilerrors "k8s.io/apimachinery/pkg/util/errors"
22+
"k8s.io/apimachinery/pkg/util/sets"
1923
"k8s.io/klog/v2"
2024

2125
configv1 "github.com/openshift/api/config/v1"
2226
"github.com/openshift/cluster-version-operator/lib/resourcemerge"
2327
"github.com/openshift/cluster-version-operator/pkg/cincinnati"
2428
"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
29+
"github.com/openshift/cluster-version-operator/pkg/internal"
2530
)
2631

2732
const noArchitecture string = "NoArchitecture"
@@ -50,6 +55,12 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
5055
channel := config.Spec.Channel
5156
desiredArch := optr.getDesiredArchitecture(config.Spec.DesiredUpdate)
5257
currentArch := optr.getCurrentArchitecture()
58+
acceptRisks := sets.New[string]()
59+
if config.Spec.DesiredUpdate != nil {
60+
for _, risk := range config.Spec.DesiredUpdate.AcceptRisks {
61+
acceptRisks.Insert(risk.Name)
62+
}
63+
}
5364

5465
// updates are only checked at most once per minimumUpdateCheckInterval or if the generation changes
5566
optrAvailableUpdates := optr.getAvailableUpdates()
@@ -129,6 +140,9 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
129140
optrAvailableUpdates.UpdateService = updateService
130141
optrAvailableUpdates.Channel = channel
131142
optrAvailableUpdates.Architecture = desiredArch
143+
optrAvailableUpdates.ShouldReconcileAcceptRisks = optr.shouldReconcileAcceptRisks
144+
optrAvailableUpdates.AcceptRisks = acceptRisks
145+
optrAvailableUpdates.RiskConditions = map[string][]metav1.Condition{}
132146
optrAvailableUpdates.ConditionRegistry = optr.conditionRegistry
133147
optrAvailableUpdates.Condition = condition
134148

@@ -167,9 +181,11 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
167181
}
168182

169183
type availableUpdates struct {
170-
UpdateService string
171-
Channel string
172-
Architecture string
184+
UpdateService string
185+
Channel string
186+
Architecture string
187+
ShouldReconcileAcceptRisks func() bool
188+
AcceptRisks sets.Set[string]
173189

174190
// LastAttempt records the time of the most recent attempt at update
175191
// retrieval, regardless of whether it was successful.
@@ -192,6 +208,9 @@ type availableUpdates struct {
192208
ConditionRegistry clusterconditions.ConditionRegistry
193209

194210
Condition configv1.ClusterOperatorStatusCondition
211+
212+
// RiskConditions stores the condition for every risk (name, url, message, matchingRules).
213+
RiskConditions map[string][]metav1.Condition
195214
}
196215

197216
func (u *availableUpdates) RecentlyAttempted(interval time.Duration) bool {
@@ -291,14 +310,17 @@ func (optr *Operator) getAvailableUpdates() *availableUpdates {
291310
}
292311

293312
u := &availableUpdates{
294-
UpdateService: optr.availableUpdates.UpdateService,
295-
Channel: optr.availableUpdates.Channel,
296-
Architecture: optr.availableUpdates.Architecture,
297-
LastAttempt: optr.availableUpdates.LastAttempt,
298-
LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange,
299-
Current: *optr.availableUpdates.Current.DeepCopy(),
300-
ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state
301-
Condition: optr.availableUpdates.Condition,
313+
UpdateService: optr.availableUpdates.UpdateService,
314+
Channel: optr.availableUpdates.Channel,
315+
Architecture: optr.availableUpdates.Architecture,
316+
ShouldReconcileAcceptRisks: optr.shouldReconcileAcceptRisks,
317+
AcceptRisks: optr.availableUpdates.AcceptRisks,
318+
RiskConditions: optr.availableUpdates.RiskConditions,
319+
LastAttempt: optr.availableUpdates.LastAttempt,
320+
LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange,
321+
Current: *optr.availableUpdates.Current.DeepCopy(),
322+
ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state
323+
Condition: optr.availableUpdates.Condition,
302324
}
303325

304326
if optr.availableUpdates.Updates != nil {
@@ -426,8 +448,11 @@ func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) {
426448
vj := semver.MustParse(u.ConditionalUpdates[j].Release.Version)
427449
return vi.GTE(vj)
428450
})
451+
if err := sanityCheck(u.ConditionalUpdates); err != nil {
452+
klog.Errorf("Sanity check failed which might impact risk evaluation: %v", err)
453+
}
429454
for i, conditionalUpdate := range u.ConditionalUpdates {
430-
condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry)
455+
condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry, u.AcceptRisks, u.ShouldReconcileAcceptRisks, u.RiskConditions)
431456

432457
if condition.Status == metav1.ConditionTrue {
433458
u.addUpdate(conditionalUpdate.Release)
@@ -441,6 +466,25 @@ func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) {
441466
}
442467
}
443468

469+
func sanityCheck(updates []configv1.ConditionalUpdate) error {
470+
risks := map[string]configv1.ConditionalUpdateRisk{}
471+
var errs []error
472+
for _, update := range updates {
473+
for _, risk := range update.Risks {
474+
if v, ok := risks[risk.Name]; ok {
475+
if diff := cmp.Diff(v, risk, cmpopts.IgnoreFields(configv1.ConditionalUpdateRisk{}, "Conditions")); diff != "" {
476+
errs = append(errs, fmt.Errorf("found collision on risk %s: %v and %v", risk.Name, v, risk))
477+
}
478+
} else if trimmed := strings.TrimSpace(risk.Name); trimmed == "" {
479+
errs = append(errs, fmt.Errorf("found invalid name on risk %v", risk))
480+
} else {
481+
risks[risk.Name] = risk
482+
}
483+
}
484+
}
485+
return utilerrors.NewAggregate(errs)
486+
}
487+
444488
func (u *availableUpdates) addUpdate(release configv1.Release) {
445489
for _, update := range u.Updates {
446490
if update.Image == release.Image {
@@ -478,33 +522,47 @@ func newRecommendedStatus(now, want metav1.ConditionStatus) metav1.ConditionStat
478522
}
479523

480524
const (
481-
recommendedReasonRisksNotExposed = "NotExposedToRisks"
482-
recommendedReasonEvaluationFailed = "EvaluationFailed"
483-
recommendedReasonMultiple = "MultipleReasons"
525+
recommendedReasonRisksNotExposed = "NotExposedToRisks"
526+
recommendedReasonAllExposedRisksAccepted = "AllExposedRisksAccepted"
527+
recommendedReasonEvaluationFailed = "EvaluationFailed"
528+
recommendedReasonMultiple = "MultipleReasons"
484529

485530
// recommendedReasonExposed is used instead of the original name if it does
486531
// not match the pattern for a valid k8s condition reason.
487532
recommendedReasonExposed = "ExposedToRisks"
533+
534+
riskConditionReasonEvaluationFailed = "EvaluationFailed"
535+
riskConditionReasonMatch = "Match"
536+
riskConditionReasonNotMatch = "NotMatch"
488537
)
489538

490539
// Reasons follow same pattern as k8s Condition Reasons
491540
// https://github.com/openshift/api/blob/59fa376de7cb668ddb95a7ee4e9879d7f6ca2767/vendor/k8s.io/apimachinery/pkg/apis/meta/v1/types.go#L1535-L1536
492541
var reasonPattern = regexp.MustCompile(`^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$`)
493542

494543
func newRecommendedReason(now, want string) string {
495-
switch now {
496-
case recommendedReasonRisksNotExposed:
544+
switch {
545+
case now == recommendedReasonRisksNotExposed ||
546+
now == recommendedReasonAllExposedRisksAccepted ||
547+
now == want:
497548
return want
498-
case want:
549+
case want == recommendedReasonRisksNotExposed:
499550
return now
500551
default:
501552
return recommendedReasonMultiple
502553
}
503554
}
504555

505-
func evaluateConditionalUpdate(ctx context.Context, risks []configv1.ConditionalUpdateRisk, conditionRegistry clusterconditions.ConditionRegistry) metav1.Condition {
556+
func evaluateConditionalUpdate(
557+
ctx context.Context,
558+
risks []configv1.ConditionalUpdateRisk,
559+
conditionRegistry clusterconditions.ConditionRegistry,
560+
acceptRisks sets.Set[string],
561+
shouldReconcileAcceptRisks func() bool,
562+
riskConditions map[string][]metav1.Condition,
563+
) metav1.Condition {
506564
recommended := metav1.Condition{
507-
Type: ConditionalUpdateConditionTypeRecommended,
565+
Type: internal.ConditionalUpdateConditionTypeRecommended,
508566
Status: metav1.ConditionTrue,
509567
// FIXME: ObservedGeneration? That would capture upstream/channel, but not necessarily the currently-reconciling version.
510568
Reason: recommendedReasonRisksNotExposed,
@@ -513,18 +571,39 @@ func evaluateConditionalUpdate(ctx context.Context, risks []configv1.Conditional
513571

514572
var errorMessages []string
515573
for _, risk := range risks {
574+
riskCondition := metav1.Condition{
575+
Type: internal.ConditionalUpdateRiskConditionTypeApplies,
576+
Status: metav1.ConditionFalse,
577+
Reason: riskConditionReasonNotMatch,
578+
}
516579
if match, err := conditionRegistry.Match(ctx, risk.MatchingRules); err != nil {
580+
msg := unknownExposureMessage(risk, err)
517581
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionUnknown)
518582
recommended.Reason = newRecommendedReason(recommended.Reason, recommendedReasonEvaluationFailed)
519-
errorMessages = append(errorMessages, unknownExposureMessage(risk, err))
583+
errorMessages = append(errorMessages, msg)
584+
riskCondition.Status = metav1.ConditionUnknown
585+
riskCondition.Reason = riskConditionReasonEvaluationFailed
586+
riskCondition.Message = msg
520587
} else if match {
521-
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse)
522-
wantReason := recommendedReasonExposed
523-
if reasonPattern.MatchString(risk.Name) {
524-
wantReason = risk.Name
588+
riskCondition.Status = metav1.ConditionTrue
589+
riskCondition.Reason = riskConditionReasonMatch
590+
if shouldReconcileAcceptRisks() && acceptRisks.Has(risk.Name) {
591+
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionTrue)
592+
recommended.Reason = newRecommendedReason(recommended.Reason, recommendedReasonAllExposedRisksAccepted)
593+
recommended.Message = "The update is recommended, because either risk does not apply to this cluster or it is accepted by cluster admins."
594+
klog.V(2).Infof("Risk with name %q is accepted by the cluster admin and thus not in the evaluation of conditional update", risk.Name)
595+
} else {
596+
recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse)
597+
wantReason := recommendedReasonExposed
598+
if reasonPattern.MatchString(risk.Name) {
599+
wantReason = risk.Name
600+
}
601+
recommended.Reason = newRecommendedReason(recommended.Reason, wantReason)
602+
errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL))
525603
}
526-
recommended.Reason = newRecommendedReason(recommended.Reason, wantReason)
527-
errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL))
604+
}
605+
if _, ok := riskConditions[risk.Name]; !ok {
606+
riskConditions[risk.Name] = []metav1.Condition{riskCondition}
528607
}
529608
}
530609
if len(errorMessages) > 0 {

0 commit comments

Comments
 (0)