Skip to content

Commit 1f7091a

Browse files
author
Moritz Clasmeier
committed
Simply teardown, hopefully making it more robust against CNO races
1 parent 8a74d78 commit 1f7091a

1 file changed

Lines changed: 40 additions & 187 deletions

File tree

internal/deployer/deployer.go

Lines changed: 40 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -38,70 +38,6 @@ var (
3838

3939
// AdminUsername is the default admin username for StackRox Central
4040
AdminUsername = "admin"
41-
42-
// TODO(#91): at some point this will get out of date. If we filter by the app.../part-of
43-
// label anyway, then maybe we should just delete all resource kinds present on cluster?
44-
// also we should use the fully-qualified types
45-
allInstallableCentralResourceKinds = []string{
46-
"applications",
47-
"clusterroles",
48-
"configmaps",
49-
"deployments",
50-
"destinationrules",
51-
"endpoints",
52-
"endpointslices",
53-
"horizontalpodautoscalers",
54-
"networkpolicys",
55-
"leases",
56-
"persistentvolumes",
57-
"persistentvolumeclaims",
58-
"pods",
59-
"podsecuritypolicys",
60-
"prometheusrules",
61-
"roles",
62-
"rolebindings",
63-
"replicasets",
64-
"routes",
65-
"secrets",
66-
"services",
67-
"serviceaccounts",
68-
"servicemonitors",
69-
"storageclasses",
70-
}
71-
72-
allInstallableSecuredClusterResourceKinds = []string{
73-
"clusterroles",
74-
"clusterrolebindings",
75-
"configmaps",
76-
"consoleplugins",
77-
"controllerrevisions",
78-
"daemonsets",
79-
"deployments",
80-
"endpoints",
81-
"endpointslices",
82-
"destinationrules",
83-
"horizontalpodautoscalers",
84-
"networkpolicys",
85-
"leases",
86-
"persistentvolumes",
87-
"persistentvolumeclaims",
88-
"pods",
89-
"podsecuritypolicys",
90-
"prometheusrules",
91-
"replicasets",
92-
"roles",
93-
"rolebindings",
94-
"secrets",
95-
"services",
96-
"serviceaccounts",
97-
"servicemonitors",
98-
"storageclasses",
99-
"validatingwebhookconfigurations",
100-
}
101-
102-
injectedCABundleConfigMapPrefix = "injected-cabundle-"
103-
injectedCABundleConfigMapCentral = injectedCABundleConfigMapPrefix + centralCrName
104-
injectedCABundleConfigMapSecuredCluster = injectedCABundleConfigMapPrefix + securedClusterCrName
10541
)
10642

10743
// Deployer is the base deployer for ACS
@@ -146,16 +82,6 @@ type ResourceToDelete struct {
14682
OwnerName string
14783
}
14884

149-
func (d *Deployer) filterResourceKinds(resourceKinds []string) []string {
150-
filteredResourceKinds := make([]string, 0, len(resourceKinds))
151-
for _, resourceKind := range resourceKinds {
152-
if _, ok := d.clusterResourceKinds[resourceKind]; ok {
153-
filteredResourceKinds = append(filteredResourceKinds, resourceKind)
154-
}
155-
}
156-
return filteredResourceKinds
157-
}
158-
15985
func (d *Deployer) deleteResource(ctx context.Context, namespace, resourceType, resourceName string, args ...string) error {
16086
return d.deleteResources(ctx, namespace, []string{resourceType}, append([]string{resourceName}, args...)...)
16187
}
@@ -175,59 +101,31 @@ func (d *Deployer) deleteResources(ctx context.Context, namespace string, resour
175101
return err
176102
}
177103

178-
func (d *Deployer) deleteFinalizers(ctx context.Context, namespace, resourceType, resourceName string) error {
179-
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
180-
Args: []string{
181-
"-n", namespace, "patch", resourceType, resourceName,
182-
"-p", `{"metadata":{"finalizers":null}}`,
183-
"--type=merge",
184-
},
185-
})
186-
return err
187-
}
188-
189104
// Expects that reconciliation for the RHACS operator is paused.
190-
func (d *Deployer) deleteCentralResources(ctx context.Context, wait bool) error {
105+
func (d *Deployer) deleteCentralResources(ctx context.Context) error {
191106
d.logger.Info("Deleting Central resources")
192-
var crExists bool
193-
194-
if d.doesResourceExist(ctx, "central", "stackrox-central-services", d.centralNamespace) {
195-
crExists = true
196-
197-
// Trigger async deletion of the Central CR.
198-
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services", "--wait=false")
199-
if err != nil {
200-
return fmt.Errorf("failed to asynchronously delete Central CR: %w", err)
201-
}
202107

203-
err = d.deleteFinalizers(ctx, d.centralNamespace, "central", "stackrox-central-services")
204-
if err != nil {
205-
return fmt.Errorf("failed to delete finalizers on Central CR: %w", err)
206-
}
108+
d.logger.Info("Removing any pause-reconcile annotation from Central")
109+
if err := d.removePauseReconcileAnnotation(ctx, "central", "stackrox-central-services", d.centralNamespace); err != nil {
110+
return err
207111
}
208-
209-
// Pause reconciliation for other controllers, not just our RHACS operator.
210-
// This is needed to ensure that there is no race causing the Cluster Network Operator
211-
// to re-create the injected-ca-bundle ConfigMap during resource deletion.
212-
if err := d.preventOtherControllersFromReconciling(ctx, component.Central); err != nil {
213-
return fmt.Errorf("failed to prevent other controllers from reconciling Central resources: %w", err)
112+
if d.verbose {
113+
d.logger.Dim("Removed any pause-reconcile annotation from Central")
214114
}
215115

216-
// Delete other resources by brute force.
217-
resourceKinds := d.filterResourceKinds(allInstallableCentralResourceKinds)
218-
err := d.deleteResources(ctx, d.centralNamespace, resourceKinds, "-l=app.kubernetes.io/part-of=stackrox-central-services")
116+
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services", "--wait")
219117
if err != nil {
220118
return err
221119
}
120+
if d.verbose {
121+
d.logger.Dim("Deleted Central CR")
122+
}
222123

223124
for _, resource := range []ResourceToDelete{
224-
{Name: "central-db", Kind: "pvc", OwnerName: centralCrName},
225-
{Name: "central-db-backup", Kind: "pvc", OwnerName: centralCrName},
125+
{Name: "central-db", Kind: "pvc"},
126+
{Name: "central-db-backup", Kind: "pvc"},
226127
{Name: "admin-password", Kind: "secret"},
227128
{Name: "scanner-db-password", Kind: "secret", OwnerName: centralCrName},
228-
// In case the Cluster Network Operator has succeeded in re-creating the injected-cabundle configmap
229-
// after our operator has already deleted it.
230-
{Name: injectedCABundleConfigMapCentral, Kind: "configmap"},
231129
} {
232130
d.logger.Dimf("Attempting to delete %s/%s", resource.Kind, resource.Name)
233131
if resource.OwnerName != "" {
@@ -251,86 +149,34 @@ func (d *Deployer) deleteCentralResources(ctx context.Context, wait bool) error
251149
}
252150
}
253151

254-
if crExists {
255-
// Now delete the Central CR synchronously.
256-
err := d.deleteResource(ctx, d.centralNamespace, "central", "stackrox-central-services")
257-
if err != nil {
258-
return fmt.Errorf("failed to delete Central CR: %w", err)
259-
}
260-
}
261-
262-
return nil
263-
}
264-
265-
func (d *Deployer) preventOtherControllersFromReconciling(ctx context.Context, comp component.Component) error {
266-
switch comp {
267-
case component.Central:
268-
return d.preventCABundleInjection(ctx, injectedCABundleConfigMapCentral, d.centralNamespace)
269-
case component.SecuredCluster:
270-
return d.preventCABundleInjection(ctx, injectedCABundleConfigMapSecuredCluster, d.sensorNamespace)
271-
default:
272-
return nil
273-
}
274-
}
275-
276-
func (d *Deployer) preventCABundleInjection(ctx context.Context, configMapName, namespace string) error {
277-
d.logger.Info("Removing CNO label from injected-cabundle ConfigMap to prevent CNO from injecting the CA bundle during cleanup")
278-
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
279-
Args: []string{
280-
"label", "configmap", configMapName, "-n", namespace,
281-
"config.openshift.io/inject-trusted-cabundle-",
282-
},
283-
IgnoreErrors: true,
284-
})
285-
286-
if err != nil {
287-
d.logger.Warningf("Failed to remove CNO label from %s: %v", configMapName, err)
288-
}
289-
290152
return nil
291153
}
292154

293-
func (d *Deployer) deleteSecuredClusterResources(ctx context.Context, wait bool) error {
155+
func (d *Deployer) deleteSecuredClusterResources(ctx context.Context) error {
294156
d.logger.Info("Deleting SecuredCluster resources")
295-
var crExists bool
296-
297-
if d.doesResourceExist(ctx, "securedcluster", "stackrox-secured-cluster-services", d.sensorNamespace) {
298-
crExists = true
299-
300-
// Trigger async deletion of the SecuredCluster CR.
301-
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services", "--wait=false")
302-
if err != nil {
303-
return err
304-
}
305157

306-
err = d.deleteFinalizers(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services")
307-
if err != nil {
308-
return fmt.Errorf("failed to delete finalizers on SecuredCluster CR: %w", err)
309-
}
158+
d.logger.Info("Removing any pause-reconcile annotation from SecuredCluster")
159+
if err := d.removePauseReconcileAnnotation(ctx, "securedcluster", "stackrox-secured-cluster-services", d.sensorNamespace); err != nil {
160+
return err
310161
}
311-
312-
// Pause reconciliation for other controllers, not just our RHACS operator.
313-
// This is needed to ensure that there is no race causing the Cluster Network Operator
314-
// to re-create the injected-ca-bundle ConfigMap during resource deletion.
315-
if err := d.preventOtherControllersFromReconciling(ctx, component.SecuredCluster); err != nil {
316-
return fmt.Errorf("failed to prevent other controllers from reconciling SecuredCluster resources: %w", err)
162+
if d.verbose {
163+
d.logger.Dim("Removed any pause-reconcile annotation from SecuredCluster")
317164
}
318165

319-
// In the meantime, delete other resources by brute force.
320-
resourceKinds := d.filterResourceKinds(allInstallableSecuredClusterResourceKinds)
321-
err := d.deleteResources(ctx, d.sensorNamespace, resourceKinds, "-l=app.kubernetes.io/part-of=stackrox-secured-cluster-services")
166+
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services", "--wait")
322167
if err != nil {
323168
return err
324169
}
170+
if d.verbose {
171+
d.logger.Dim("Deleted SecuredCluster CR")
172+
}
325173

174+
// Delete resources, which are treated special.
326175
for _, resource := range []ResourceToDelete{
327176
{Name: "cluster-registration-secret", Kind: "secret"},
328177
// We need to make sure that don't accidentally delete a scanner-db-password belonging to the central CR,
329178
// when both are deployed into the same namespace.
330179
{Name: "scanner-db-password", Kind: "secret", OwnerName: securedClusterCrName},
331-
// In case the Cluster Network Operator has succeeded in re-creating the injected-cabundle configmap
332-
// after our operator has already deleted it.
333-
{Name: injectedCABundleConfigMapSecuredCluster, Kind: "configmap"},
334180
} {
335181
d.logger.Dimf("Attempting to delete %s/%s", resource.Kind, resource.Name)
336182
if resource.OwnerName != "" {
@@ -353,14 +199,6 @@ func (d *Deployer) deleteSecuredClusterResources(ctx context.Context, wait bool)
353199
}
354200
}
355201

356-
if crExists {
357-
// Now delete the SecuredCluster CR synchronously.
358-
err := d.deleteResource(ctx, d.sensorNamespace, "securedcluster", "stackrox-secured-cluster-services")
359-
if err != nil {
360-
return fmt.Errorf("failed to delete SecuredCluster CR: %w", err)
361-
}
362-
}
363-
364202
return nil
365203
}
366204

@@ -780,7 +618,7 @@ func (d *Deployer) teardownCentral(ctx context.Context) error {
780618
}
781619

782620
d.logger.Info("⏳ Waiting for Central resources to be fully deleted...")
783-
err := d.deleteCentralResources(ctx, true)
621+
err := d.deleteCentralResources(ctx)
784622
if err != nil {
785623
return fmt.Errorf("failed to delete Central resources: %w", err)
786624
}
@@ -805,7 +643,7 @@ func (d *Deployer) teardownSecuredCluster(ctx context.Context) error {
805643
}
806644

807645
d.logger.Info("⏳ Waiting for SecuredCluster resources to be fully deleted...")
808-
err := d.deleteSecuredClusterResources(ctx, true)
646+
err := d.deleteSecuredClusterResources(ctx)
809647
if err != nil {
810648
return fmt.Errorf("failed to delete SecuredCluster resources: %w", err)
811649
}
@@ -1022,6 +860,21 @@ func (d *Deployer) addPauseReconcileAnnotation(ctx context.Context, resourceType
1022860
return nil
1023861
}
1024862

863+
func (d *Deployer) removePauseReconcileAnnotation(ctx context.Context, resourceType, resourceName, namespace string) error {
864+
_, err := d.runKubectl(ctx, k8s.KubectlOptions{
865+
Args: []string{
866+
"annotate", resourceType, resourceName,
867+
"-n", namespace,
868+
fmt.Sprintf("%s-", pauseReconcileAnnotationKey),
869+
},
870+
})
871+
if err != nil {
872+
return fmt.Errorf("failed to remove pause-reconcile annotation: %w", err)
873+
}
874+
875+
return nil
876+
}
877+
1025878
func (d *Deployer) SetDeployOperator(deployOperator bool) {
1026879
d.shouldDeployOperator = deployOperator
1027880
}

0 commit comments

Comments
 (0)