Skip to content

Commit d413b03

Browse files
committed
Add Application's health check
1 parent 2aa99fc commit d413b03

3 files changed

Lines changed: 150 additions & 98 deletions

File tree

internal/controller/argo.go

Lines changed: 122 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,127 @@ func newArgoCD(name, namespace string, patternsOperatorConfig PatternsOperatorCo
7171
argoPolicy := strings.Join(argoPolicies, "\n")
7272
defaultPolicy := "role:readonly"
7373
argoScopes := "[groups,email]"
74+
75+
resourceHealthChecks := []argooperator.ResourceHealthCheck{
76+
{
77+
// We can drop this custom Subscription healthcheck once https://www.github.com/argoproj/argo-cd/issues/25921 is fixed
78+
Group: "operators.coreos.com",
79+
Kind: "Subscription",
80+
Check: `local health_status = {}
81+
if obj.status ~= nil then
82+
if obj.status.conditions ~= nil then
83+
local numDegraded = 0
84+
local numPending = 0
85+
local msg = ""
86+
87+
-- Check if this is a manual approval scenario where InstallPlanPending is expected
88+
-- and the operator is already installed (upgrade pending, not initial install)
89+
local isManualApprovalPending = false
90+
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
91+
for _, condition in pairs(obj.status.conditions) do
92+
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
93+
-- Only treat as expected healthy state if the operator is already installed
94+
-- (installedCSV is present), meaning this is an upgrade pending approval
95+
if obj.status.installedCSV ~= nil then
96+
isManualApprovalPending = true
97+
end
98+
break
99+
end
100+
end
101+
end
102+
103+
for i, condition in pairs(obj.status.conditions) do
104+
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
105+
if isManualApprovalPending and condition.type == "InstallPlanPending" then
106+
-- Do not include in message or count as pending
107+
else
108+
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
109+
if condition.type == "InstallPlanPending" and condition.status == "True" then
110+
numPending = numPending + 1
111+
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
112+
numDegraded = numDegraded + 1
113+
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
114+
numDegraded = numDegraded + 1
115+
end
116+
end
117+
end
118+
119+
-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
120+
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
121+
if obj.status.state == nil then
122+
numPending = numPending + 1
123+
msg = msg .. ".status.state not yet known\n"
124+
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
125+
numPending = numPending + 1
126+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
127+
elseif obj.status.state == "UpgradePending" then
128+
-- UpgradePending with manual approval is expected behavior, treat as healthy
129+
if isManualApprovalPending then
130+
msg = msg .. ".status.state is 'AtLatestKnown'\n"
131+
else
132+
numPending = numPending + 1
133+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
134+
end
135+
elseif obj.status.state == "UpgradeFailed" then
136+
numDegraded = numDegraded + 1
137+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
138+
else
139+
-- Last possiblity of .status.state: AtLatestKnown
140+
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
141+
end
142+
143+
if numDegraded == 0 and numPending == 0 then
144+
health_status.status = "Healthy"
145+
health_status.message = msg
146+
return health_status
147+
elseif numPending > 0 and numDegraded == 0 then
148+
health_status.status = "Progressing"
149+
health_status.message = msg
150+
return health_status
151+
else
152+
health_status.status = "Degraded"
153+
health_status.message = msg
154+
return health_status
155+
end
156+
end
157+
end
158+
health_status.status = "Progressing"
159+
health_status.message = "An install plan for a subscription is pending installation"
160+
return health_status`,
161+
},
162+
}
163+
if strings.EqualFold(patternsOperatorConfig.getValueWithDefault("gitops.applicationHealthCheckEnabled"), "true") {
164+
// As of ArgoCD 1.8 the Application health check was dropped (see https://github.com/argoproj/argo-cd/issues/3781),
165+
// but in app-of-apps pattern this is needed in order to implement children apps dependencies via sync-waves
166+
resourceHealthChecks = append(resourceHealthChecks, argooperator.ResourceHealthCheck{
167+
Group: "argoproj.io",
168+
Kind: "Application",
169+
Check: `local health_status = {}
170+
health_status.status = "Progressing"
171+
health_status.message = ""
172+
if obj.status ~= nil then
173+
if obj.status.health ~= nil then
174+
-- we consider the Application Healthy only when the health status is Healthy AND it's synced
175+
if obj.status.health.status == "Healthy" and (obj.status.sync and obj.status.sync.status or nil) == "Synced" then
176+
health_status.status = "Healthy"
177+
health_status.message = (obj.status.health.message or "Application is healthy and synced")
178+
return health_status
179+
end
180+
-- We consider the Application Degraded only when the Sync failed for 'retry.limit' times
181+
if obj.status.operationState ~= nil then
182+
local retryLimit = (obj.status.operationState.operation and obj.status.operationState.operation.retry and obj.status.operationState.operation.retry.limit or nil)
183+
local retryCount = (obj.status.operationState.retryCount or nil)
184+
if retryLimit == retryCount and obj.status.operationState.phase ~= "Succeeded" then
185+
health_status.status = "Degraded"
186+
health_status.message = "Retry limit reached and sync didn't succeed"
187+
end
188+
end
189+
end
190+
end
191+
return health_status`,
192+
})
193+
}
194+
74195
trueBool := true
75196
initVolumes := []v1.Volume{
76197
{
@@ -264,93 +385,7 @@ func newArgoCD(name, namespace string, patternsOperatorConfig PatternsOperatorCo
264385
- TaskRun
265386
- PipelineRun`,
266387
// We can drop this custom Subscription healthcheck once https://www.github.com/argoproj/argo-cd/issues/25921 is fixed
267-
ResourceHealthChecks: []argooperator.ResourceHealthCheck{
268-
{
269-
Group: "operators.coreos.com",
270-
Kind: "Subscription",
271-
Check: `local health_status = {}
272-
if obj.status ~= nil then
273-
if obj.status.conditions ~= nil then
274-
local numDegraded = 0
275-
local numPending = 0
276-
local msg = ""
277-
278-
-- Check if this is a manual approval scenario where InstallPlanPending is expected
279-
-- and the operator is already installed (upgrade pending, not initial install)
280-
local isManualApprovalPending = false
281-
if obj.spec ~= nil and obj.spec.installPlanApproval == "Manual" then
282-
for _, condition in pairs(obj.status.conditions) do
283-
if condition.type == "InstallPlanPending" and condition.status == "True" and condition.reason == "RequiresApproval" then
284-
-- Only treat as expected healthy state if the operator is already installed
285-
-- (installedCSV is present), meaning this is an upgrade pending approval
286-
if obj.status.installedCSV ~= nil then
287-
isManualApprovalPending = true
288-
end
289-
break
290-
end
291-
end
292-
end
293-
294-
for i, condition in pairs(obj.status.conditions) do
295-
-- Skip InstallPlanPending condition when manual approval is pending (expected behavior)
296-
if isManualApprovalPending and condition.type == "InstallPlanPending" then
297-
-- Do not include in message or count as pending
298-
else
299-
msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
300-
if condition.type == "InstallPlanPending" and condition.status == "True" then
301-
numPending = numPending + 1
302-
elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
303-
numDegraded = numDegraded + 1
304-
elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
305-
numDegraded = numDegraded + 1
306-
end
307-
end
308-
end
309-
310-
-- Available states: undef/nil, UpgradeAvailable, UpgradePending, UpgradeFailed, AtLatestKnown
311-
-- Source: https://github.com/openshift/operator-framework-olm/blob/5e2c73b7663d0122c9dc3e59ea39e515a31e2719/staging/api/pkg/operators/v1alpha1/subscription_types.go#L17-L23
312-
if obj.status.state == nil then
313-
numPending = numPending + 1
314-
msg = msg .. ".status.state not yet known\n"
315-
elseif obj.status.state == "" or obj.status.state == "UpgradeAvailable" then
316-
numPending = numPending + 1
317-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
318-
elseif obj.status.state == "UpgradePending" then
319-
-- UpgradePending with manual approval is expected behavior, treat as healthy
320-
if isManualApprovalPending then
321-
msg = msg .. ".status.state is 'AtLatestKnown'\n"
322-
else
323-
numPending = numPending + 1
324-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
325-
end
326-
elseif obj.status.state == "UpgradeFailed" then
327-
numDegraded = numDegraded + 1
328-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
329-
else
330-
-- Last possiblity of .status.state: AtLatestKnown
331-
msg = msg .. ".status.state is '" .. obj.status.state .. "'\n"
332-
end
333-
334-
if numDegraded == 0 and numPending == 0 then
335-
health_status.status = "Healthy"
336-
health_status.message = msg
337-
return health_status
338-
elseif numPending > 0 and numDegraded == 0 then
339-
health_status.status = "Progressing"
340-
health_status.message = msg
341-
return health_status
342-
else
343-
health_status.status = "Degraded"
344-
health_status.message = msg
345-
return health_status
346-
end
347-
end
348-
end
349-
health_status.status = "Progressing"
350-
health_status.message = "An install plan for a subscription is pending installation"
351-
return health_status`,
352-
},
353-
},
388+
ResourceHealthChecks: resourceHealthChecks,
354389
ResourceTrackingMethod: "annotation",
355390
Server: argooperator.ArgoCDServerSpec{
356391
Autoscale: argooperator.ArgoCDServerAutoscaleSpec{

internal/controller/argo_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,22 @@ var _ = Describe("newArgoCD", func() {
21782178
Expect(*argo.Spec.RBAC.Policy).To(ContainSubstring("test-admins"))
21792179
})
21802180

2181+
It("should have only Subscription ResourceHealthChecks", func() {
2182+
argo = newArgoCD("test-argo", "test-ns", DefaultPatternsOperatorConfig)
2183+
Expect(argo.Spec.ResourceHealthChecks).ToNot(BeNil())
2184+
Expect(argo.Spec.ResourceHealthChecks).To(HaveLen(1))
2185+
Expect(argo.Spec.ResourceHealthChecks[0].Group).To(Equal("operators.coreos.com"))
2186+
Expect(argo.Spec.ResourceHealthChecks[0].Kind).To(Equal("Subscription"))
2187+
})
2188+
2189+
It("should have also Application ResourceHealthChecks when gitops.applicationHealthCheckEnabled is set to true", func() {
2190+
argo = newArgoCD("test-argo", "test-ns", PatternsOperatorConfig{"gitops.applicationHealthCheckEnabled": "true"})
2191+
Expect(argo.Spec.ResourceHealthChecks).ToNot(BeNil())
2192+
Expect(argo.Spec.ResourceHealthChecks).To(HaveLen(2))
2193+
Expect(argo.Spec.ResourceHealthChecks[1].Group).To(Equal("argoproj.io"))
2194+
Expect(argo.Spec.ResourceHealthChecks[1].Kind).To(Equal("Application"))
2195+
})
2196+
21812197
})
21822198

21832199
var _ = Describe("commonSyncPolicy", func() {

internal/controller/defaults.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,18 @@ const (
8787
// Currently none
8888

8989
var DefaultPatternsOperatorConfig = map[string]string{
90-
"gitops.catalogSource": GitOpsDefaultCatalogSource,
91-
"gitops.channel": GitOpsDefaultChannel,
92-
"gitops.sourceNamespace": GitOpsDefaultCatalogSourceNamespace,
93-
"gitops.installApprovalPlan": GitOpsDefaultApprovalPlan,
94-
"gitops.csv": GitOpsDefaultCSV,
95-
"gitops.additionalArgoAdmins": "",
96-
"gitea.chartName": GiteaChartName,
97-
"gitea.helmRepoUrl": GiteaHelmRepoUrl,
98-
"gitea.chartVersion": GiteaDefaultChartVersion,
99-
"analytics.enabled": "true",
100-
"catalog.image": "",
90+
"gitops.catalogSource": GitOpsDefaultCatalogSource,
91+
"gitops.channel": GitOpsDefaultChannel,
92+
"gitops.sourceNamespace": GitOpsDefaultCatalogSourceNamespace,
93+
"gitops.installApprovalPlan": GitOpsDefaultApprovalPlan,
94+
"gitops.csv": GitOpsDefaultCSV,
95+
"gitops.additionalArgoAdmins": "",
96+
"gitops.applicationHealthCheckEnabled": "false",
97+
"gitea.chartName": GiteaChartName,
98+
"gitea.helmRepoUrl": GiteaHelmRepoUrl,
99+
"gitea.chartVersion": GiteaDefaultChartVersion,
100+
"analytics.enabled": "true",
101+
"catalog.image": "",
101102
}
102103

103104
type PatternsOperatorConfig map[string]string

0 commit comments

Comments
 (0)