Skip to content

Commit f85aff1

Browse files
harcheclaude
andcommitted
pkg/readiness: Add readiness checks and wire into proposal controller
Add pkg/readiness package with 9 cluster readiness checks that gather pre-upgrade health data: cluster conditions, operator health, API deprecations, node capacity, PDB drain blockers, etcd health, network config, CRD compatibility, and OLM operator lifecycle. Wire readiness.RunAll() into the proposal controller, replacing the hardcoded readinessJSON placeholder with real per-target readiness data that gets embedded in each proposal's request body. Plumb dynamic.Interface from pkg/start through cvo.New() to the proposal controller to support the readiness checks' cluster queries. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 52cc8e3 commit f85aff1

26 files changed

Lines changed: 4020 additions & 160 deletions

.openshift-tests-extension/openshift_payload_cluster-version-operator.json

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,95 @@
110110
"source": "openshift:payload:cluster-version-operator",
111111
"lifecycle": "informing",
112112
"environmentSelector": {}
113+
},
114+
{
115+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should run all checks without errors",
116+
"labels": {},
117+
"resources": {
118+
"isolation": {}
119+
},
120+
"source": "openshift:payload:cluster-version-operator",
121+
"lifecycle": "blocking",
122+
"environmentSelector": {}
123+
},
124+
{
125+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should produce valid JSON that round-trips",
126+
"labels": {},
127+
"resources": {
128+
"isolation": {}
129+
},
130+
"source": "openshift:payload:cluster-version-operator",
131+
"lifecycle": "blocking",
132+
"environmentSelector": {}
133+
},
134+
{
135+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report node count matching the actual cluster",
136+
"labels": {},
137+
"resources": {
138+
"isolation": {}
139+
},
140+
"source": "openshift:payload:cluster-version-operator",
141+
"lifecycle": "blocking",
142+
"environmentSelector": {}
143+
},
144+
{
145+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report operator count matching actual ClusterOperators",
146+
"labels": {},
147+
"resources": {
148+
"isolation": {}
149+
},
150+
"source": "openshift:payload:cluster-version-operator",
151+
"lifecycle": "blocking",
152+
"environmentSelector": {}
153+
},
154+
{
155+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report etcd member count matching actual etcd pods",
156+
"labels": {},
157+
"resources": {
158+
"isolation": {}
159+
},
160+
"source": "openshift:payload:cluster-version-operator",
161+
"lifecycle": "blocking",
162+
"environmentSelector": {}
163+
},
164+
{
165+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report network type matching actual Network config",
166+
"labels": {},
167+
"resources": {
168+
"isolation": {}
169+
},
170+
"source": "openshift:payload:cluster-version-operator",
171+
"lifecycle": "blocking",
172+
"environmentSelector": {}
173+
},
174+
{
175+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report PDB count matching actual PodDisruptionBudgets",
176+
"labels": {},
177+
"resources": {
178+
"isolation": {}
179+
},
180+
"source": "openshift:payload:cluster-version-operator",
181+
"lifecycle": "blocking",
182+
"environmentSelector": {}
183+
},
184+
{
185+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should report cluster conditions matching ClusterVersion status",
186+
"labels": {},
187+
"resources": {
188+
"isolation": {}
189+
},
190+
"source": "openshift:payload:cluster-version-operator",
191+
"lifecycle": "blocking",
192+
"environmentSelector": {}
193+
},
194+
{
195+
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator readiness checks should complete all checks within 60 seconds",
196+
"labels": {},
197+
"resources": {
198+
"isolation": {}
199+
},
200+
"source": "openshift:payload:cluster-version-operator",
201+
"lifecycle": "blocking",
202+
"environmentSelector": {}
113203
}
114204
]

install/0000_00_cluster-version-operator_30_deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ spec:
7777
fieldPath: spec.nodeName
7878
- name: CLUSTER_PROFILE
7979
value: '{{ .ClusterProfile }}'
80+
- name: LIGHTSPEED_SKILLS_IMAGE
81+
value: "quay.io/openshift/ci:ocp_5.0_agentic-skills"
8082
# this pod is hostNetwork and uses the internal LB DNS name when possible, which the kubelet also uses.
8183
# this dnsPolicy allows us to use the same dnsConfig as the kubelet, without access to read it ourselves.
8284
dnsPolicy: Default

pkg/cvo/availableupdates_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func newOperator(url string, cluster release, promqlMock clusterconditions.Condi
208208
func() ([]configv1.Release, []configv1.ConditionalUpdate, error) {
209209
return nil, nil, nil
210210
},
211-
fake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) {
211+
fake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) {
212212
return &configv1.ClusterVersion{}, nil
213213
},
214214
func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) {

pkg/cvo/cvo.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1818
"k8s.io/apimachinery/pkg/util/sets"
1919
"k8s.io/apimachinery/pkg/util/wait"
20+
"k8s.io/client-go/dynamic"
2021
informerscorev1 "k8s.io/client-go/informers/core/v1"
2122
"k8s.io/client-go/kubernetes"
2223
"k8s.io/client-go/kubernetes/scheme"
@@ -109,6 +110,7 @@ type Operator struct {
109110

110111
client clientset.Interface
111112
kubeClient kubernetes.Interface
113+
dynamicClient dynamic.Interface
112114
operatorClient operatorclientset.Interface
113115
eventRecorder record.EventRecorder
114116

@@ -235,6 +237,7 @@ func New(
235237
featureGateInformer configinformersv1.FeatureGateInformer,
236238
client clientset.Interface,
237239
kubeClient kubernetes.Interface,
240+
dynamicClient dynamic.Interface,
238241
operatorClient operatorclientset.Interface,
239242
exclude string,
240243
clusterProfile string,
@@ -267,6 +270,7 @@ func New(
267270

268271
client: client,
269272
kubeClient: kubeClient,
273+
dynamicClient: dynamicClient,
270274
operatorClient: operatorClient,
271275
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: namespace}),
272276
queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "clusterversion"}),
@@ -354,6 +358,7 @@ func New(
354358
return availableUpdates.Updates, availableUpdates.ConditionalUpdates, nil
355359
},
356360
rtClient,
361+
dynamicClient,
357362
cvInformer.Lister().Get,
358363
func(ctx context.Context, namespace, name string, opts metav1.GetOptions) (*corev1.ConfigMap, error) {
359364
return kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, name, opts)

pkg/cvo/cvo_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,7 @@ func TestOperator_availableUpdatesSync(t *testing.T) {
27562756
ctx := context.Background()
27572757
optr.proposalController = proposal.NewController(func() ([]configv1.Release, []configv1.ConditionalUpdate, error) {
27582758
return nil, nil, nil
2759-
}, ctrlruntimefake.NewClientBuilder().Build(), func(_ string) (*configv1.ClusterVersion, error) {
2759+
}, ctrlruntimefake.NewClientBuilder().Build(), nil, func(_ string) (*configv1.ClusterVersion, error) {
27602760
return &configv1.ClusterVersion{}, nil
27612761
}, func(_ context.Context, namespace, name string, _ metav1.GetOptions) (*corev1.ConfigMap, error) {
27622762
return &corev1.ConfigMap{}, nil

pkg/payload/testdata/TestRenderManifest_expected_cvo_deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ spec:
7777
fieldPath: spec.nodeName
7878
- name: CLUSTER_PROFILE
7979
value: 'some-profile'
80+
- name: LIGHTSPEED_SKILLS_IMAGE
81+
value: "quay.io/openshift/ci:ocp_5.0_agentic-skills"
8082
# this pod is hostNetwork and uses the internal LB DNS name when possible, which the kubelet also uses.
8183
# this dnsPolicy allows us to use the same dnsConfig as the kubelet, without access to read it ourselves.
8284
dnsPolicy: Default

pkg/proposal/analysis_schema.json

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"type": "object",
3+
"required": ["analysisData"],
4+
"properties": {
5+
"analysisData": {
6+
"type": "array",
7+
"description": "Typed components describing upgrade readiness. Must include exactly one ota_readiness_summary. Include one ota_finding per blocker or warning. Include one ota_olm_operator_status if OLM operators are present.",
8+
"minItems": 1,
9+
"items": {
10+
"oneOf": [
11+
{
12+
"type": "object",
13+
"description": "Overall upgrade readiness summary with per-check results.",
14+
"properties": {
15+
"type": { "type": "string", "const": "ota_readiness_summary" },
16+
"decision": {
17+
"type": "string",
18+
"enum": ["recommend", "caution", "block", "escalate"],
19+
"description": "recommend=all clear, caution=warnings only, block=blockers found, escalate=insufficient data"
20+
},
21+
"checks": {
22+
"type": "array",
23+
"description": "One entry per readiness check from the input JSON.",
24+
"items": {
25+
"type": "object",
26+
"properties": {
27+
"name": { "type": "string", "description": "Check name, e.g. Cluster Conditions, Operator Health" },
28+
"status": { "type": "string", "enum": ["pass", "warn", "fail", "error"] },
29+
"detail": { "type": "string", "description": "One-line summary" }
30+
},
31+
"required": ["name", "status"]
32+
}
33+
}
34+
},
35+
"required": ["type", "decision", "checks"]
36+
},
37+
{
38+
"type": "object",
39+
"description": "A specific blocker, warning, or informational finding.",
40+
"properties": {
41+
"type": { "type": "string", "const": "ota_finding" },
42+
"severity": { "type": "string", "enum": ["blocker", "warning", "info"] },
43+
"check": { "type": "string", "description": "Which readiness check surfaced this" },
44+
"detail": { "type": "string", "description": "Description for a cluster administrator" },
45+
"affectedResources": { "type": "array", "items": { "type": "string" } },
46+
"prerequisite": { "type": "string", "description": "Action to resolve before upgrading" },
47+
"verifyCommand": { "type": "string", "description": "Command to verify the finding is resolved" }
48+
},
49+
"required": ["type", "severity", "check", "detail"]
50+
},
51+
{
52+
"type": "object",
53+
"description": "Per-operator OLM lifecycle status.",
54+
"properties": {
55+
"type": { "type": "string", "const": "ota_olm_operator_status" },
56+
"operators": {
57+
"type": "array",
58+
"items": {
59+
"type": "object",
60+
"properties": {
61+
"name": { "type": "string" },
62+
"namespace": { "type": "string" },
63+
"displayName": { "type": "string" },
64+
"installedVersion": { "type": "string" },
65+
"channel": { "type": "string" },
66+
"source": { "type": "string" },
67+
"installPlanApproval": { "type": "string", "enum": ["Automatic", "Manual"] },
68+
"pendingUpgrade": { "type": "boolean" },
69+
"pendingVersion": { "type": "string" },
70+
"compatibleWithTarget": { "type": "boolean" },
71+
"availableChannels": { "type": "array", "items": { "type": "string" } },
72+
"ocpCompat": {
73+
"type": "object",
74+
"properties": { "min": { "type": "string" }, "max": { "type": "string" } }
75+
},
76+
"lifecycle": {
77+
"type": "object",
78+
"properties": {
79+
"productName": { "type": "string" },
80+
"supportPhase": { "type": "string", "enum": ["Full Support", "Maintenance Support", "End of life"] },
81+
"ocpVersions": { "type": "string" },
82+
"maintenanceEnds": { "type": "string" }
83+
}
84+
}
85+
},
86+
"required": ["name", "namespace"]
87+
}
88+
},
89+
"summary": {
90+
"type": "object",
91+
"properties": {
92+
"totalOperators": { "type": "integer" },
93+
"pendingUpgrades": { "type": "integer" },
94+
"manualApproval": { "type": "integer" },
95+
"incompatibleWithTarget": { "type": "integer" }
96+
}
97+
}
98+
},
99+
"required": ["type", "operators", "summary"]
100+
}
101+
]
102+
}
103+
}
104+
}
105+
}

0 commit comments

Comments
 (0)