Skip to content

Commit 8be9908

Browse files
harcheclaude
andcommitted
pkg/readiness: Add readiness checks and wire into proposal controller
Add pkg/readiness package with 9 cluster readiness checks that gather pre-upgrade health data: cluster conditions, operator health, API deprecations, node capacity, PDB drain blockers, etcd health, network config, CRD compatibility, and OLM operator lifecycle. Wire readiness.RunAll() into the proposal controller, replacing the hardcoded readinessJSON placeholder with real per-target readiness data that gets embedded in each proposal's request body. Plumb dynamic.Interface from pkg/start through cvo.New() to the proposal controller to support the readiness checks' cluster queries. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 52cc8e3 commit 8be9908

23 files changed

Lines changed: 3808 additions & 16 deletions

.openshift-tests-extension/openshift_payload_cluster-version-operator.json

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,95 @@
110110
"source": "openshift:payload:cluster-version-operator",
111111
"lifecycle": "informing",
112112
"environmentSelector": {}
113+
},
114+
{
115+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should run all checks without errors",
116+
"labels": {},
117+
"resources": {
118+
"isolation": {}
119+
},
120+
"source": "openshift:payload:cluster-version-operator",
121+
"lifecycle": "blocking",
122+
"environmentSelector": {}
123+
},
124+
{
125+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should produce valid JSON that round-trips",
126+
"labels": {},
127+
"resources": {
128+
"isolation": {}
129+
},
130+
"source": "openshift:payload:cluster-version-operator",
131+
"lifecycle": "blocking",
132+
"environmentSelector": {}
133+
},
134+
{
135+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report node count matching the actual cluster",
136+
"labels": {},
137+
"resources": {
138+
"isolation": {}
139+
},
140+
"source": "openshift:payload:cluster-version-operator",
141+
"lifecycle": "blocking",
142+
"environmentSelector": {}
143+
},
144+
{
145+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report operator count matching actual ClusterOperators",
146+
"labels": {},
147+
"resources": {
148+
"isolation": {}
149+
},
150+
"source": "openshift:payload:cluster-version-operator",
151+
"lifecycle": "blocking",
152+
"environmentSelector": {}
153+
},
154+
{
155+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report etcd member count matching actual etcd pods",
156+
"labels": {},
157+
"resources": {
158+
"isolation": {}
159+
},
160+
"source": "openshift:payload:cluster-version-operator",
161+
"lifecycle": "blocking",
162+
"environmentSelector": {}
163+
},
164+
{
165+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report network type matching actual Network config",
166+
"labels": {},
167+
"resources": {
168+
"isolation": {}
169+
},
170+
"source": "openshift:payload:cluster-version-operator",
171+
"lifecycle": "blocking",
172+
"environmentSelector": {}
173+
},
174+
{
175+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report PDB count matching actual PodDisruptionBudgets",
176+
"labels": {},
177+
"resources": {
178+
"isolation": {}
179+
},
180+
"source": "openshift:payload:cluster-version-operator",
181+
"lifecycle": "blocking",
182+
"environmentSelector": {}
183+
},
184+
{
185+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report cluster conditions matching ClusterVersion status",
186+
"labels": {},
187+
"resources": {
188+
"isolation": {}
189+
},
190+
"source": "openshift:payload:cluster-version-operator",
191+
"lifecycle": "blocking",
192+
"environmentSelector": {}
193+
},
194+
{
195+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should complete all checks within 60 seconds",
196+
"labels": {},
197+
"resources": {
198+
"isolation": {}
199+
},
200+
"source": "openshift:payload:cluster-version-operator",
201+
"lifecycle": "blocking",
202+
"environmentSelector": {}
113203
}
114204
]

pkg/cvo/availableupdates_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func newOperator(url string, cluster release, promqlMock clusterconditions.Condi
208208
func() ([]configv1.Release, []configv1.ConditionalUpdate, error) {
209209
return nil, nil, nil
210210
},
211-
fake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) {
211+
fake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) {
212212
return &configv1.ClusterVersion{}, nil
213213
},
214214
func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) {

pkg/cvo/cvo.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1818
"k8s.io/apimachinery/pkg/util/sets"
1919
"k8s.io/apimachinery/pkg/util/wait"
20+
"k8s.io/client-go/dynamic"
2021
informerscorev1 "k8s.io/client-go/informers/core/v1"
2122
"k8s.io/client-go/kubernetes"
2223
"k8s.io/client-go/kubernetes/scheme"
@@ -109,6 +110,7 @@ type Operator struct {
109110

110111
client clientset.Interface
111112
kubeClient kubernetes.Interface
113+
dynamicClient dynamic.Interface
112114
operatorClient operatorclientset.Interface
113115
eventRecorder record.EventRecorder
114116

@@ -235,6 +237,7 @@ func New(
235237
featureGateInformer configinformersv1.FeatureGateInformer,
236238
client clientset.Interface,
237239
kubeClient kubernetes.Interface,
240+
dynamicClient dynamic.Interface,
238241
operatorClient operatorclientset.Interface,
239242
exclude string,
240243
clusterProfile string,
@@ -267,6 +270,7 @@ func New(
267270

268271
client: client,
269272
kubeClient: kubeClient,
273+
dynamicClient: dynamicClient,
270274
operatorClient: operatorClient,
271275
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: namespace}),
272276
queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "clusterversion"}),
@@ -354,6 +358,7 @@ func New(
354358
return availableUpdates.Updates, availableUpdates.ConditionalUpdates, nil
355359
},
356360
rtClient,
361+
dynamicClient,
357362
cvInformer.Lister().Get,
358363
func(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error) {
359364
return kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, opts)

pkg/cvo/cvo_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) {
27562756
ctx := context.Background()
27572757
optr.proposalController = proposal.NewController(func() ([]configv1.Release, []configv1.ConditionalUpdate, error) {
27582758
return nil, nil, nil
2759-
}, ctrlruntimefake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) {
2759+
}, ctrlruntimefake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) {
27602760
return &configv1.ClusterVersion{}, nil
27612761
}, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) {
27622762
return &corev1.ConfigMap{}, nil

pkg/proposal/controller.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package proposal
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"os"
78
"regexp"
@@ -17,20 +18,23 @@ import (
1718
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1819
kutilerrors "k8s.io/apimachinery/pkg/util/errors"
1920
"k8s.io/apimachinery/pkg/util/sets"
21+
"k8s.io/client-go/dynamic"
2022
"k8s.io/client-go/util/workqueue"
2123
"k8s.io/klog/v2"
2224

2325
configv1 "github.com/openshift/api/config/v1"
2426
proposalv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1"
2527

2628
i "github.com/openshift/cluster-version-operator/pkg/internal"
29+
"github.com/openshift/cluster-version-operator/pkg/readiness"
2730
)
2831

2932
type Controller struct {
3033
queueKey string
3134
queue workqueue.TypedRateLimitingInterface[any]
3235
updatesGetterFunc updatesGetterFunc
3336
client ctrlruntimeclient.Client
37+
dynamicClient dynamic.Interface
3438
cvGetterFunc cvGetterFunc
3539
configMapGetterFunc configMapGetterFunc
3640
getCurrentVersionFunc getCurrentVersionFunc
@@ -57,6 +61,7 @@ type configMapGetterFunc func(ctx context.Context, namespace, name string, opts
5761
func NewController(
5862
updatesGetterFunc updatesGetterFunc,
5963
client ctrlruntimeclient.Client,
64+
dynamicClient dynamic.Interface,
6065
cvGetterFunc cvGetterFunc,
6166
configMapGetterFunc configMapGetterFunc,
6267
getCurrentVersionFunc getCurrentVersionFunc,
@@ -68,6 +73,7 @@ func NewController(
6873
workqueue.TypedRateLimitingQueueConfig[any]{Name: controllerName}),
6974
updatesGetterFunc: updatesGetterFunc,
7075
client: client,
76+
dynamicClient: dynamicClient,
7177
cvGetterFunc: cvGetterFunc,
7278
configMapGetterFunc: configMapGetterFunc,
7379
getCurrentVersionFunc: getCurrentVersionFunc,
@@ -152,9 +158,7 @@ func (c *Controller) Sync(ctx context.Context, key string) error {
152158
return kutilerrors.NewAggregate(errs)
153159
}
154160

155-
// TODO: Implement it
156-
readinessJSON := "{}"
157-
proposals, err := getProposals(updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt, readinessJSON)
161+
proposals, err := getProposals(ctx, c.dynamicClient, updates, conditionalUpdates, c.config.Namespace, currentVersion, cv.Spec.Channel, prompt)
158162
if err != nil {
159163
klog.V(i.Normal).Infof("Getting proposals hit an error: %v", err)
160164
return kutilerrors.NewAggregate(append(errs, err))
@@ -277,17 +281,23 @@ func deleteProposal(ctx context.Context, client ctrlruntimeclient.Client, propos
277281
}
278282

279283
func getProposals(
284+
ctx context.Context,
285+
dynamicClient dynamic.Interface,
280286
availableUpdates []configv1.Release,
281287
conditionalUpdates []configv1.ConditionalUpdate,
282288
namespace string,
283289
currentVersion, channel,
284290
systemPrompt string,
285-
readinessJSON string,
286291
) ([]*proposalv1alpha1.Proposal, error) {
292+
// TODO: Only 2 of 9 readiness checks (api_deprecations, olm_lifecycle) use the target version.
293+
// The other 7 query cluster-wide state identical across targets. For clusters with many available
294+
// updates, split into target-independent checks (run once) and target-dependent checks (run per
295+
// target) to reduce redundant API calls.
287296
var errs []error
288297
var proposals []*proposalv1alpha1.Proposal
289298
for _, au := range availableUpdates {
290299
targetVersion := au.Version
300+
readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion)
291301
if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindRecommended, systemPrompt, readinessJSON, availableUpdates); err != nil {
292302
errs = append(errs, err)
293303
continue
@@ -298,6 +308,7 @@ func getProposals(
298308

299309
for _, cu := range conditionalUpdates {
300310
targetVersion := cu.Release.Version
311+
readinessJSON := runReadinessJSON(ctx, dynamicClient, currentVersion, targetVersion)
301312
if proposal, err := getProposal(namespace, currentVersion, targetVersion, channel, updateKindConditional, systemPrompt, readinessJSON, availableUpdates); err != nil {
302313
errs = append(errs, err)
303314
continue
@@ -437,6 +448,20 @@ func classifyUpdate(current, target string) string {
437448
return i.UpdateType(cv, tv)
438449
}
439450

451+
func runReadinessJSON(ctx context.Context, dynamicClient dynamic.Interface, currentVersion, targetVersion string) string {
452+
if dynamicClient == nil {
453+
klog.V(i.Normal).Infof("Dynamic client is nil; skipping readiness checks for %s -> %s", currentVersion, targetVersion)
454+
return "{}"
455+
}
456+
output := readiness.RunAll(ctx, dynamicClient, currentVersion, targetVersion)
457+
data, err := json.Marshal(output)
458+
if err != nil {
459+
klog.V(i.Normal).Infof("Failed to marshal readiness output for %s -> %s: %v", currentVersion, targetVersion, err)
460+
return "{}"
461+
}
462+
return string(data)
463+
}
464+
440465
// buildRequest constructs the proposal request with system prompt, metadata, and readiness data.
441466
func buildRequest(systemPrompt, current, target, channel, updateType, targetType string,
442467
updates []configv1.Release, readinessJSON string) string {

0 commit comments

Comments
 (0)