Skip to content

Commit 2f6373e

Browse files
mclasmeierMoritz Clasmeier
andauthored
Simplify teardown, hopefully making it more robust against CNO races (#164)
Co-authored-by: Moritz Clasmeier <mclasmeier@redhat.com>
1 parent a72a41a commit 2f6373e

1 file changed

Lines changed: 64 additions & 186 deletions

File tree

internal/deployer/deployer.go

Lines changed: 64 additions & 186 deletions
Original file line numberDiff line numberDiff line change
@@ -38,70 +38,6 @@ var (
3838

3939
// AdminUsername is the default admin username for StackRox Central
4040
AdminUsername = "admin"
41-
42-
// TODO(#91): at some point this will get out of date. If we filter by the app.../part-of
43-
// label anyway, then maybe we should just delete all resource kinds present on cluster?
44-
// also we should use the fully-qualified types
45-
allInstallableCentralResourceKinds = []string{
46-
"applications",
47-
"clusterroles",
48-
"configmaps",
49-
"deployments",
50-
"destinationrules",
51-
"endpoints",
52-
"endpointslices",
53-
"horizontalpodautoscalers",
54-
"networkpolicys",
55-
"leases",
56-
"persistentvolumes",
57-
"persistentvolumeclaims",
58-
"pods",
59-
"podsecuritypolicys",
60-
"prometheusrules",
61-
"roles",
62-
"rolebindings",
63-
"replicasets",
64-
"routes",
65-
"secrets",
66-
"services",
67-
"serviceaccounts",
68-
"servicemonitors",
69-
"storageclasses",
70-
}
71-
72-
allInstallableSecuredClusterResourceKinds = []string{
73-
"clusterroles",
74-
"clusterrolebindings",
75-
"configmaps",
76-
"consoleplugins",
77-
"controllerrevisions",
78-
"daemonsets",
79-
"deployments",
80-
"endpoints",
81-
"endpointslices",
82-
"destinationrules",
83-
"horizontalpodautoscalers",
84-
"networkpolicys",
85-
"leases",
86-
"persistentvolumes",
87-
"persistentvolumeclaims",
88-
"pods",
89-
"podsecuritypolicys",
90-
"prometheusrules",
91-
"replicasets",
92-
"roles",
93-
"rolebindings",
94-
"secrets",
95-
"services",
96-
"serviceaccounts",
97-
"servicemonitors",
98-
"storageclasses",
99-
"validatingwebhookconfigurations",
100-
}
101-
102-
injectedCABundleConfigMapPrefix = "injected-cabundle-"
103-
injectedCABundleConfigMapCentral = injectedCABundleConfigMapPrefix + centralCrName
104-
injectedCABundleConfigMapSecuredCluster = injectedCABundleConfigMapPrefix + securedClusterCrName
10541
)
10642

10743
// Deployer is the base deployer for ACS
@@ -146,16 +82,6 @@ type ResourceToDelete struct {
14682
OwnerName string
14783
}
14884

149-
func (d *Deployer) filterResourceKinds(resourceKinds []string) []string {
150-
filteredResourceKinds := make([]string, 0, len(resourceKinds))
151-
for _, resourceKind := range resourceKinds {
152-
if _, ok := d.clusterResourceKinds[resourceKind]; ok {
153-
filteredResourceKinds = append(filteredResourceKinds, resourceKind)
154-
}
155-
}
156-
return filteredResourceKinds
157-
}
158-
15985
func (d *Deployer) deleteResource(ctx context.Context, namespace, resourceType, resourceName string, args ...string) error {
16086
return d.deleteResources(ctx, namespace, []string{resourceType}, append([]string{resourceName}, args...)...)
16187
}
@@ -175,59 +101,43 @@ func (d *Deployer) deleteResources(ctx context.Context, namespace string, resour
175101
return err
176102
}
177103

178-
func (d *Deployer) deleteFinalizers(ctx context.Context, namespace, resourceType, resourceName string) error {
179-
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
180-
Args: []string{
181-
"-n", namespace, "patch", resourceType, resourceName,
182-
"-p", `{"metadata":{"finalizers":null}}`,
183-
"--type=merge",
184-
},
185-
})
186-
return err
187-
}
188-
189104
// Expects that reconciliation for the RHACS operator is paused.
190-
func (d *Deployer) deleteCentralResources(ctx context.Context, wait bool) error {
105+
func (d *Deployer) deleteCentralResources(ctx context.Context) error {
191106
d.logger.Info("Deleting Central resources")
192-
var crExists bool
107+
crExists := true
193108

194-
if d.doesResourceExist(ctx, "central", "stackrox-central-services", d.centralNamespace) {
195-
crExists = true
109+
if _, err := k8s.RetrieveResourceFromCluster(ctx, d.logger, d.centralNamespace, "central", "stackrox-central-services"); err != nil {
110+
if !k8s.IsResourceNotFound(err) {
111+
return fmt.Errorf("retrieving Central CR: %w", err)
112+
}
113+
crExists = false
114+
}
196115

197-
// Trigger async deletion of the Central CR.
198-
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services", "--wait=false")
199-
if err != nil {
200-
return fmt.Errorf("failed to asynchronously delete Central CR: %w", err)
116+
if crExists {
117+
d.logger.Info("Removing any pause-reconcile annotation from Central")
118+
if err := d.removePauseReconcileAnnotation(ctx, "central", "stackrox-central-services", d.centralNamespace); err != nil {
119+
return err
120+
}
121+
if d.verbose {
122+
d.logger.Dim("Removed any pause-reconcile annotation from Central")
201123
}
202124

203-
err = d.deleteFinalizers(ctx, d.centralNamespace, "central", "stackrox-central-services")
125+
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services", "--wait")
204126
if err != nil {
205-
return fmt.Errorf("failed to delete finalizers on Central CR: %w", err)
127+
return err
206128
}
207-
}
208-
209-
// Pause reconciliation for other controllers, not just our RHACS operator.
210-
// This is needed to ensure that there is no race causing the Cluster Network Operator
211-
// to re-create the injected-ca-bundle ConfigMap during resource deletion.
212-
if err := d.preventOtherControllersFromReconciling(ctx, component.Central); err != nil {
213-
return fmt.Errorf("failed to prevent other controllers from reconciling Central resources: %w", err)
214-
}
215-
216-
// Delete other resources by brute force.
217-
resourceKinds := d.filterResourceKinds(allInstallableCentralResourceKinds)
218-
err := d.deleteResources(ctx, d.centralNamespace, resourceKinds, "-l=app.kubernetes.io/part-of=stackrox-central-services")
219-
if err != nil {
220-
return err
129+
if d.verbose {
130+
d.logger.Dim("Deleted Central CR")
131+
}
132+
} else {
133+
d.logger.Info("Deletion of Central resources requested, but Central CR is not present anymore")
221134
}
222135

223136
for _, resource := range []ResourceToDelete{
224-
{Name: "central-db", Kind: "pvc", OwnerName: centralCrName},
225-
{Name: "central-db-backup", Kind: "pvc", OwnerName: centralCrName},
137+
{Name: "central-db", Kind: "pvc"},
138+
{Name: "central-db-backup", Kind: "pvc"},
226139
{Name: "admin-password", Kind: "secret"},
227140
{Name: "scanner-db-password", Kind: "secret", OwnerName: centralCrName},
228-
// In case the Cluster Network Operator has succeeded in re-creating the injected-cabundle configmap
229-
// after our operator has already deleted it.
230-
{Name: injectedCABundleConfigMapCentral, Kind: "configmap"},
231141
} {
232142
d.logger.Dimf("Attempting to delete %s/%s", resource.Kind, resource.Name)
233143
if resource.OwnerName != "" {
@@ -251,86 +161,46 @@ func (d *Deployer) deleteCentralResources(ctx context.Context, wait bool) error
251161
}
252162
}
253163

254-
if crExists {
255-
// Now delete the Central CR synchronously.
256-
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services")
257-
if err != nil {
258-
return fmt.Errorf("failed to delete Central CR: %w", err)
259-
}
260-
}
261-
262164
return nil
263165
}
264166

265-
func (d *Deployer) preventOtherControllersFromReconciling(ctx context.Context, comp component.Component) error {
266-
switch comp {
267-
case component.Central:
268-
return d.preventCABundleInjection(ctx, injectedCABundleConfigMapCentral, d.centralNamespace)
269-
case component.SecuredCluster:
270-
return d.preventCABundleInjection(ctx, injectedCABundleConfigMapSecuredCluster, d.sensorNamespace)
271-
default:
272-
return nil
273-
}
274-
}
275-
276-
func (d *Deployer) preventCABundleInjection(ctx context.Context, configMapName, namespace string) error {
277-
d.logger.Info("Removing CNO label from injected-cabundle ConfigMap to prevent CNO from injecting the CA bundle during cleanup")
278-
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
279-
Args: []string{
280-
"label", "configmap", configMapName, "-n", namespace,
281-
"config.openshift.io/inject-trusted-cabundle-",
282-
},
283-
IgnoreErrors: true,
284-
})
285-
286-
if err != nil {
287-
d.logger.Warningf("Failed to remove CNO label from %s: %v", configMapName, err)
288-
}
289-
290-
return nil
291-
}
292-
293-
func (d *Deployer) deleteSecuredClusterResources(ctx context.Context, wait bool) error {
167+
func (d *Deployer) deleteSecuredClusterResources(ctx context.Context) error {
294168
d.logger.Info("Deleting SecuredCluster resources")
295-
var crExists bool
169+
crExists := true
296170

297-
if d.doesResourceExist(ctx, "securedcluster", "stackrox-secured-cluster-services", d.sensorNamespace) {
298-
crExists = true
171+
if _, err := k8s.RetrieveResourceFromCluster(ctx, d.logger, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services"); err != nil {
172+
if !k8s.IsResourceNotFound(err) {
173+
return fmt.Errorf("retrieving SecuredCluster CR: %w", err)
174+
}
175+
crExists = false
176+
}
299177

300-
// Trigger async deletion of the SecuredCluster CR.
301-
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services", "--wait=false")
302-
if err != nil {
178+
if crExists {
179+
d.logger.Info("Removing any pause-reconcile annotation from SecuredCluster")
180+
if err := d.removePauseReconcileAnnotation(ctx, "securedcluster", "stackrox-secured-cluster-services", d.sensorNamespace); err != nil {
303181
return err
304182
}
183+
if d.verbose {
184+
d.logger.Dim("Removed any pause-reconcile annotation from SecuredCluster")
185+
}
305186

306-
err = d.deleteFinalizers(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services")
187+
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services", "--wait")
307188
if err != nil {
308-
return fmt.Errorf("failed to delete finalizers on SecuredCluster CR: %w", err)
189+
return err
309190
}
191+
if d.verbose {
192+
d.logger.Dim("Deleted SecuredCluster CR")
193+
}
194+
} else {
195+
d.logger.Info("Deletion of SecuredCluster resources requested, but SecuredCluster CR is not present anymore")
310196
}
311197

312-
// Pause reconciliation for other controllers, not just our RHACS operator.
313-
// This is needed to ensure that there is no race causing the Cluster Network Operator
314-
// to re-create the injected-ca-bundle ConfigMap during resource deletion.
315-
if err := d.preventOtherControllersFromReconciling(ctx, component.SecuredCluster); err != nil {
316-
return fmt.Errorf("failed to prevent other controllers from reconciling SecuredCluster resources: %w", err)
317-
}
318-
319-
// In the meantime, delete other resources by brute force.
320-
resourceKinds := d.filterResourceKinds(allInstallableSecuredClusterResourceKinds)
321-
err := d.deleteResources(ctx, d.sensorNamespace, resourceKinds, "-l=app.kubernetes.io/part-of=stackrox-secured-cluster-services")
322-
if err != nil {
323-
return err
324-
}
325-
198+
// Delete resources, which are treated special.
326199
for _, resource := range []ResourceToDelete{
327200
{Name: "cluster-registration-secret", Kind: "secret"},
328201
// We need to make sure that don't accidentally delete a scanner-db-password belonging to the central CR,
329202
// when both are deployed into the same namespace.
330203
{Name: "scanner-db-password", Kind: "secret", OwnerName: securedClusterCrName},
331-
// In case the Cluster Network Operator has succeeded in re-creating the injected-cabundle configmap
332-
// after our operator has already deleted it.
333-
{Name: injectedCABundleConfigMapSecuredCluster, Kind: "configmap"},
334204
} {
335205
d.logger.Dimf("Attempting to delete %s/%s", resource.Kind, resource.Name)
336206
if resource.OwnerName != "" {
@@ -353,14 +223,6 @@ func (d *Deployer) deleteSecuredClusterResources(ctx context.Context, wait bool)
353223
}
354224
}
355225

356-
if crExists {
357-
// Now delete the SecuredCluster CR synchronously.
358-
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services")
359-
if err != nil {
360-
return fmt.Errorf("failed to delete SecuredCluster CR: %w", err)
361-
}
362-
}
363-
364226
return nil
365227
}
366228

@@ -780,7 +642,7 @@ func (d *Deployer) teardownCentral(ctx context.Context) error {
780642
}
781643

782644
d.logger.Info("⏳ Waiting for Central resources to be fully deleted...")
783-
err := d.deleteCentralResources(ctx, true)
645+
err := d.deleteCentralResources(ctx)
784646
if err != nil {
785647
return fmt.Errorf("failed to delete Central resources: %w", err)
786648
}
@@ -805,7 +667,7 @@ func (d *Deployer) teardownSecuredCluster(ctx context.Context) error {
805667
}
806668

807669
d.logger.Info("⏳ Waiting for SecuredCluster resources to be fully deleted...")
808-
err := d.deleteSecuredClusterResources(ctx, true)
670+
err := d.deleteSecuredClusterResources(ctx)
809671
if err != nil {
810672
return fmt.Errorf("failed to delete SecuredCluster resources: %w", err)
811673
}
@@ -1022,6 +884,22 @@ func (d *Deployer) addPauseReconcileAnnotation(ctx context.Context, resourceType
1022884
return nil
1023885
}
1024886

887+
func (d *Deployer) removePauseReconcileAnnotation(ctx context.Context, resourceType, resourceName, namespace string) error {
888+
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
889+
Args: []string{
890+
"annotate", resourceType, resourceName,
891+
"-n", namespace,
892+
fmt.Sprintf("%s-", pauseReconcileAnnotationKey),
893+
},
894+
IgnoreErrors: true,
895+
})
896+
if err != nil {
897+
return fmt.Errorf("failed to remove pause-reconcile annotation: %w", err)
898+
}
899+
900+
return nil
901+
}
902+
1025903
func (d *Deployer) SetDeployOperator(deployOperator bool) {
1026904
d.shouldDeployOperator = deployOperator
1027905
}

0 commit comments

Comments
 (0)