Skip to content

Commit bbc4a07

Browse files
committed
Align custom healthcheck with clustergroup
Tested with: ```sh oc get argocd -n vp-gitops vp-gitops -o jsonpath='{.spec.resourceHealthChecks}' | jq -r '.[].kind' Subscription PersistentVolumeClaim ```
1 parent 4d970c3 commit bbc4a07

3 files changed

Lines changed: 123 additions & 88 deletions

File tree

templates/_helpers.tpl

Lines changed: 95 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -135,92 +135,112 @@ predicates:
135135
{{- end -}}
136136
{{- end -}} {{- /*acm.app.clusterSelector */}}
137137

138-
{{/*
139-
Subscription health check Lua script for ArgoCD resource health checks
140-
*/}}
141-
{{- define "acm.subscription.healthcheck.lua" -}}
142-
local health_status = {}
143-
if obj.status ~= nil then
144-
if obj.status.conditions ~= nil then
145-
local numDegraded = 0
146-
local numPending = 0
147-
local msg = ""
138+
{{/* Please make sure that these healthchecks are the same in the operator code */}}
139+
{{- define "acm.default.healthchecks" -}}
140+
- group: operators.coreos.com
141+
kind: Subscription
142+
check: |
143+
local health_status = {}
144+
if obj.status ~= nil then
145+
if obj.status.conditions ~= nil then
146+
local numDegraded = 0
147+
local numPending = 0
148+
local msg = ""
148149

149-
-- Check if this is a manual approval scenario where InstallPlanPending is expected
150-
-- and the operator is already installed (upgrade pending, not initial install)
151-
local isManualApprovalPending = false
152-
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
153-
for _, condition in pairs(obj.status.conditions) do
154-
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
155-
-- Only treat as expected healthy state if the operator is already installed
156-
-- (installedCSV is present), meaning this is an upgrade pending approval
157-
if obj.status.installedCSV ~= nil then
158-
isManualApprovalPending = true
150+
-- Check if this is a manual approval scenario where InstallPlanPending is expected
151+
-- and the operator is already installed (upgrade pending, not initial install)
152+
local isManualApprovalPending = false
153+
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
154+
for _, condition in pairs(obj.status.conditions) do
155+
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
156+
-- Only treat as expected healthy state if the operator is already installed
157+
-- (installedCSV is present), meaning this is an upgrade pending approval
158+
if obj.status.installedCSV ~= nil then
159+
isManualApprovalPending = true
160+
end
161+
break
162+
end
159163
end
160-
break
161164
end
162-
end
163-
end
164165

165-
for i, condition in pairs(obj.status.conditions) do
166-
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
167-
if isManualApprovalPending and condition.type == "InstallPlanPending" then
168-
-- Do not include in message or count as pending
169-
else
170-
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
171-
if condition.type == "InstallPlanPending" and condition.status == "True" then
166+
for i, condition in pairs(obj.status.conditions) do
167+
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
168+
if isManualApprovalPending and condition.type == "InstallPlanPending" then
169+
-- Do not include in message or count as pending
170+
else
171+
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
172+
if condition.type == "InstallPlanPending" and condition.status == "True" then
173+
numPending = numPending + 1
174+
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
175+
numDegraded = numDegraded + 1
176+
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
177+
numDegraded = numDegraded + 1
178+
end
179+
end
180+
end
181+
182+
-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
183+
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
184+
if obj.status.state == nil then
172185
numPending = numPending + 1
173-
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
174-
numDegraded = numDegraded + 1
175-
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
186+
msg = msg .. ".status.state not yet known\n"
187+
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
188+
numPending = numPending + 1
189+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
190+
elseif obj.status.state == "UpgradePending" then
191+
-- UpgradePending with manual approval is expected behavior, treat as healthy
192+
if isManualApprovalPending then
193+
msg = msg .. ".status.state is 'AtLatestKnown'\n"
194+
else
195+
numPending = numPending + 1
196+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
197+
end
198+
elseif obj.status.state == "UpgradeFailed" then
176199
numDegraded = numDegraded + 1
200+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
201+
else
202+
-- Last possiblity of .status.state: AtLatestKnown
203+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
177204
end
178-
end
179-
end
180205

181-
-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
182-
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
183-
if obj.status.state == nil then
184-
numPending = numPending + 1
185-
msg = msg .. ".status.state not yet known\n"
186-
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
187-
numPending = numPending + 1
188-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
189-
elseif obj.status.state == "UpgradePending" then
190-
-- UpgradePending with manual approval is expected behavior, treat as healthy
191-
if isManualApprovalPending then
192-
msg = msg .. ".status.state is 'AtLatestKnown'\n"
193-
else
194-
numPending = numPending + 1
195-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
206+
if numDegraded == 0 and numPending == 0 then
207+
health_status.status = "Healthy"
208+
health_status.message = msg
209+
return health_status
210+
elseif numPending > 0 and numDegraded == 0 then
211+
health_status.status = "Progressing"
212+
health_status.message = msg
213+
return health_status
214+
else
215+
health_status.status = "Degraded"
216+
health_status.message = msg
217+
return health_status
218+
end
196219
end
197-
elseif obj.status.state == "UpgradeFailed" then
198-
numDegraded = numDegraded + 1
199-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
200-
else
201-
-- Last possiblity of .status.state: AtLatestKnown
202-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
203220
end
204-
205-
if numDegraded == 0 and numPending == 0 then
206-
health_status.status = "Healthy"
207-
health_status.message = msg
208-
return health_status
209-
elseif numPending > 0 and numDegraded == 0 then
210-
health_status.status = "Progressing"
211-
health_status.message = msg
212-
return health_status
213-
else
214-
health_status.status = "Degraded"
215-
health_status.message = msg
216-
return health_status
221+
health_status.status = "Progressing"
222+
health_status.message = "An install plan for a subscription is pending installation"
223+
return health_status
224+
- kind: PersistentVolumeClaim
225+
check: |
226+
hs = {}
227+
if obj.status ~= nil then
228+
if obj.status.phase ~= nil then
229+
if obj.status.phase == "Pending" then
230+
hs.status = "Healthy"
231+
hs.message = obj.status.phase
232+
return hs
233+
elseif obj.status.phase == "Bound" then
234+
hs.status = "Healthy"
235+
hs.message = obj.status.phase
236+
return hs
237+
end
238+
end
217239
end
218-
end
219-
end
220-
health_status.status = "Progressing"
221-
health_status.message = "An install plan for a subscription is pending installation"
222-
return health_status
223-
{{- end }} {{- /*acm.subscription.healthcheck.lua */}}
240+
hs.status = "Progressing"
241+
hs.message = "Waiting for PVC"
242+
return hs
243+
{{- end }} {{- /*acm.default.healthchecks */}}
224244

225245
{{/*
226246
Determines if the current cluster is a hub cluster.

templates/policies/ocp-gitops-policy.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,7 @@ spec:
267267
- PipelineRun
268268
# Custom Subscription healthcheck to handle manual approval scenarios
269269
resourceHealthChecks:
270-
- group: operators.coreos.com
271-
kind: Subscription
272-
check: |
273-
{{- include "acm.subscription.healthcheck.lua" . | nindent 24 }}
270+
{{- include "acm.default.healthchecks" . | nindent 20 }}
274271
{{- range $.Values.acm.extraResourceHealthChecks }}
275272
- group: {{ .group }}
276273
kind: {{ .kind }}

tests/ocp_gitops_policy_test.yaml

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ tests:
9696
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.channel
9797
value: gitops-priority
9898

99-
- it: Should have only the default Subscription healthcheck when no extra healthchecks are configured
99+
- it: Should have the default Subscription and PVC healthchecks when no extra healthchecks are configured
100100
values:
101101
- ./clusterselector_values.yaml
102102
asserts:
@@ -105,7 +105,7 @@ tests:
105105
value: group-one-gitops-policy-argocd
106106
lengthEqual:
107107
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
108-
count: 1
108+
count: 2
109109
- documentSelector:
110110
path: metadata.name
111111
value: group-one-gitops-policy-argocd
@@ -118,6 +118,12 @@ tests:
118118
equal:
119119
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[0].kind
120120
value: Subscription
121+
- documentSelector:
122+
path: metadata.name
123+
value: group-one-gitops-policy-argocd
124+
equal:
125+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
126+
value: PersistentVolumeClaim
121127

122128
- it: Should append a single extra resource healthcheck
123129
values:
@@ -137,7 +143,7 @@ tests:
137143
value: group-one-gitops-policy-argocd
138144
lengthEqual:
139145
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
140-
count: 2
146+
count: 3
141147
- documentSelector:
142148
path: metadata.name
143149
value: group-one-gitops-policy-argocd
@@ -148,13 +154,19 @@ tests:
148154
path: metadata.name
149155
value: group-one-gitops-policy-argocd
150156
equal:
151-
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group
157+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
158+
value: PersistentVolumeClaim
159+
- documentSelector:
160+
path: metadata.name
161+
value: group-one-gitops-policy-argocd
162+
equal:
163+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group
152164
value: argoproj.io
153165
- documentSelector:
154166
path: metadata.name
155167
value: group-one-gitops-policy-argocd
156168
equal:
157-
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
169+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind
158170
value: Application
159171

160172
- it: Should append multiple extra resource healthchecks
@@ -183,22 +195,28 @@ tests:
183195
value: group-one-gitops-policy-argocd
184196
lengthEqual:
185197
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks
186-
count: 3
198+
count: 4
187199
- documentSelector:
188200
path: metadata.name
189201
value: group-one-gitops-policy-argocd
190202
equal:
191-
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].group
192-
value: argoproj.io
203+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[1].kind
204+
value: PersistentVolumeClaim
193205
- documentSelector:
194206
path: metadata.name
195207
value: group-one-gitops-policy-argocd
196208
equal:
197209
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].group
210+
value: argoproj.io
211+
- documentSelector:
212+
path: metadata.name
213+
value: group-one-gitops-policy-argocd
214+
equal:
215+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].group
198216
value: batch
199217
- documentSelector:
200218
path: metadata.name
201219
value: group-one-gitops-policy-argocd
202220
equal:
203-
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[2].kind
221+
path: spec.policy-templates[0].objectDefinition.spec.object-templates[0].objectDefinition.spec.resourceHealthChecks[3].kind
204222
value: Job

0 commit comments

Comments
 (0)