Skip to content

Commit 7ce7dc1

Browse files
committed
(bug) deployment error
When Sveltos deploys Kubernetes resources using _PolicyRefs_ or _KustomizationRefs_, it should only remove stale resources **after** the new resources have been successfully deployed. If deployment fails for any reason, stale resources should not be removed. For example, if Sveltos previously deployed resources A, B, and C, and a new update (with invalid data) attempts to update resource B but fails, Sveltos currently treats B and C as stale. This is because it successfully redeploys A, fails on B, and assumes only A is needed—leading to incorrect cleanup.
1 parent 2cc46a5 commit 7ce7dc1

4 files changed

Lines changed: 233 additions & 11 deletions

File tree

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controllers/handlers_kustomize.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,12 @@ func deployKustomizeRefs(ctx context.Context, c client.Client,
146146
return err
147147
}
148148

149+
// If a deployment error happened, do not try to clean stale resources. Because of the error potentially
150+
// all resources might be considered stale at this time.
151+
if deployError != nil {
152+
return deployError
153+
}
154+
149155
var undeployed []configv1beta1.ResourceReport
150156
_, undeployed, err = cleanStaleKustomizeResources(ctx, remoteRestConfig, remoteClient, clusterSummary,
151157
localResourceReports, remoteResourceReports, logger)
@@ -174,10 +180,6 @@ func deployKustomizeRefs(ctx context.Context, c client.Client,
174180
return &configv1beta1.DryRunReconciliationError{}
175181
}
176182

177-
if deployError != nil {
178-
return deployError
179-
}
180-
181183
return validateHealthPolicies(ctx, remoteRestConfig, clusterSummary, configv1beta1.FeatureKustomize, logger)
182184
}
183185

controllers/handlers_resources.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ func deployResources(ctx context.Context, c client.Client,
101101
return err
102102
}
103103

104+
// If a deployment error happened, do not try to clean stale resources. Because of the error potentially
105+
// all resources might be considered stale at this time.
106+
if deployError != nil {
107+
return deployError
108+
}
109+
104110
var undeployed []configv1beta1.ResourceReport
105111
_, undeployed, err = cleanStaleResources(ctx, remoteRestConfig, remoteClient, clusterSummary,
106112
localResourceReports, remoteResourceReports, logger)
@@ -125,10 +131,6 @@ func deployResources(ctx context.Context, c client.Client,
125131
return err
126132
}
127133

128-
if deployError != nil {
129-
return deployError
130-
}
131-
132134
return validateHealthPolicies(ctx, remoteRestConfig, clusterSummary, configv1beta1.FeatureResources, logger)
133135
}
134136

test/fv/stale_resource_test.go

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
/*
2+
Copyright 2025. projectsveltos.io. All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package fv_test
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
. "github.com/onsi/ginkgo/v2"
25+
. "github.com/onsi/gomega"
26+
27+
corev1 "k8s.io/api/core/v1"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
"k8s.io/apimachinery/pkg/types"
30+
31+
configv1beta1 "github.com/projectsveltos/addon-controller/api/v1beta1"
32+
"github.com/projectsveltos/addon-controller/controllers"
33+
libsveltosv1beta1 "github.com/projectsveltos/libsveltos/api/v1beta1"
34+
)
35+
36+
const (
37+
serviceTemplate = ` apiVersion: v1
38+
kind: Service
39+
metadata:
40+
namespace: %s
41+
name: %s
42+
labels:
43+
app: my-app
44+
environment: production
45+
spec:
46+
selector:
47+
app: my-app
48+
ports:
49+
- port: 80
50+
targetPort: 8080
51+
protocol: TCP
52+
name: http
53+
- port: 443
54+
targetPort: 8443
55+
protocol: TCP
56+
name: https
57+
type: ClusterIP
58+
sessionAffinity: None`
59+
60+
incorrectService = ` apiVersion: v1
61+
kind: Service
62+
metadata:
63+
namespace: %s
64+
name: %s
65+
labels:
66+
app: my-app
67+
environment: production
68+
spec:
69+
# Missing required 'selector' field
70+
ports:
71+
- port: 80
72+
targetPort: 8080
73+
protocol: TCP
74+
name: http
75+
- port: 443
76+
targetPort: 8443
77+
protocol: TCP
78+
name: https
79+
type: ClusterIP
80+
sessionAffinity: None`
81+
)
82+
83+
// This test runs in Serial because it requires the addon controller to only onboard
84+
// capi cluster with onboard annotations
85+
var _ = Describe("Stale Resources", Serial, func() {
86+
const (
87+
namePrefix = "stale-resource"
88+
)
89+
90+
It("Stale resources are removed only after successful deployment", Label("NEW-FV", "EXTENDED"), func() {
91+
Byf("Create a ClusterProfile matching Cluster %s/%s", kindWorkloadCluster.Namespace, kindWorkloadCluster.Name)
92+
clusterProfile := getClusterProfile(namePrefix, map[string]string{key: value})
93+
clusterProfile.Spec.SyncMode = configv1beta1.SyncModeContinuous
94+
Expect(k8sClient.Create(context.TODO(), clusterProfile)).To(Succeed())
95+
96+
verifyClusterProfileMatches(clusterProfile)
97+
98+
verifyClusterSummary(controllers.ClusterProfileLabelName,
99+
clusterProfile.Name, &clusterProfile.Spec,
100+
kindWorkloadCluster.Namespace, kindWorkloadCluster.Name)
101+
102+
configMapNs := randomString()
103+
Byf("Create configMap's namespace %s", configMapNs)
104+
ns := &corev1.Namespace{
105+
ObjectMeta: metav1.ObjectMeta{
106+
Name: configMapNs,
107+
},
108+
}
109+
Expect(k8sClient.Create(context.TODO(), ns)).To(Succeed())
110+
111+
service1 := randomString()
112+
service2 := randomString()
113+
service3 := randomString()
114+
115+
Byf("Create a configMap with Service")
116+
configMap := createConfigMapWithPolicy(configMapNs, namePrefix+randomString(),
117+
fmt.Sprintf(serviceTemplate, configMapNs, service1),
118+
fmt.Sprintf(serviceTemplate, configMapNs, service2),
119+
fmt.Sprintf(serviceTemplate, configMapNs, service3),
120+
)
121+
Expect(k8sClient.Create(context.TODO(), configMap)).To(Succeed())
122+
currentConfigMap := &corev1.ConfigMap{}
123+
Expect(k8sClient.Get(context.TODO(),
124+
types.NamespacedName{Namespace: configMap.Namespace, Name: configMap.Name}, currentConfigMap)).To(Succeed())
125+
126+
Byf("Update ClusterProfile %s to reference ConfigMap %s/%s", clusterProfile.Name, configMap.Namespace, configMap.Name)
127+
currentClusterProfile := &configv1beta1.ClusterProfile{}
128+
Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: clusterProfile.Name}, currentClusterProfile)).To(Succeed())
129+
currentClusterProfile.Spec.PolicyRefs = []configv1beta1.PolicyRef{
130+
{
131+
Kind: string(libsveltosv1beta1.ConfigMapReferencedResourceKind),
132+
Namespace: configMap.Namespace,
133+
Name: configMap.Name,
134+
},
135+
}
136+
Expect(k8sClient.Update(context.TODO(), currentClusterProfile)).To(Succeed())
137+
138+
clusterSummary := verifyClusterSummary(controllers.ClusterProfileLabelName,
139+
currentClusterProfile.Name, &currentClusterProfile.Spec,
140+
kindWorkloadCluster.Namespace, kindWorkloadCluster.Name)
141+
142+
Byf("Getting client to access the workload cluster")
143+
workloadClient, err := getKindWorkloadClusterKubeconfig()
144+
Expect(err).To(BeNil())
145+
Expect(workloadClient).ToNot(BeNil())
146+
147+
for _, serviceName := range []string{service1, service2, service3} {
148+
Byf("Verifying Service %s is created in the workload cluster", serviceName)
149+
Eventually(func() error {
150+
currentService := &corev1.Service{}
151+
return workloadClient.Get(context.TODO(),
152+
types.NamespacedName{Namespace: configMapNs, Name: serviceName}, currentService)
153+
}, timeout, pollingInterval).Should(BeNil())
154+
}
155+
156+
Byf("Verifying ClusterSummary %s status is set to Deployed for Resources feature", clusterSummary.Name)
157+
verifyFeatureStatusIsProvisioned(kindWorkloadCluster.Namespace, clusterSummary.Name, configv1beta1.FeatureResources)
158+
159+
By("Updating ConfigMap to reference incorrect Service")
160+
Expect(k8sClient.Get(context.TODO(),
161+
types.NamespacedName{Namespace: configMap.Namespace, Name: configMap.Name}, currentConfigMap)).To(Succeed())
162+
allClusterRoleName := randomString()
163+
currentConfigMap = updateConfigMapWithPolicy(currentConfigMap, fmt.Sprintf(allClusterRole, allClusterRoleName))
164+
Expect(k8sClient.Update(context.TODO(), currentConfigMap)).To(Succeed())
165+
166+
By("Updating ConfigMap to reference also an incorrect Service")
167+
incorrectServiceName := randomString()
168+
Expect(k8sClient.Get(context.TODO(),
169+
types.NamespacedName{Namespace: configMap.Namespace, Name: configMap.Name}, currentConfigMap)).To(Succeed())
170+
currentConfigMap = updateConfigMapWithPolicy(currentConfigMap,
171+
fmt.Sprintf(serviceTemplate, configMapNs, service1),
172+
fmt.Sprintf(incorrectService, configMapNs, incorrectServiceName),
173+
fmt.Sprintf(serviceTemplate, configMapNs, service2),
174+
fmt.Sprintf(serviceTemplate, configMapNs, service3))
175+
Expect(k8sClient.Update(context.TODO(), currentConfigMap)).To(Succeed())
176+
177+
for _, serviceName := range []string{service1, service2, service3} {
178+
Byf("Verifying Service %s is created in the workload cluster", serviceName)
179+
Consistently(func() error {
180+
currentService := &corev1.Service{}
181+
return workloadClient.Get(context.TODO(),
182+
types.NamespacedName{Namespace: configMapNs, Name: serviceName}, currentService)
183+
}, time.Minute, pollingInterval).Should(BeNil())
184+
}
185+
186+
Byf("Verifying Service %s is created in not the workload cluster", incorrectServiceName)
187+
Consistently(func() error {
188+
currentService := &corev1.Service{}
189+
return workloadClient.Get(context.TODO(),
190+
types.NamespacedName{Namespace: configMapNs, Name: incorrectServiceName}, currentService)
191+
}, time.Minute, pollingInterval).ShouldNot(BeNil())
192+
193+
By("Updating ConfigMap to reference also a fourth Service")
194+
service4 := randomString()
195+
Expect(k8sClient.Get(context.TODO(),
196+
types.NamespacedName{Namespace: configMap.Namespace, Name: configMap.Name}, currentConfigMap)).To(Succeed())
197+
currentConfigMap = updateConfigMapWithPolicy(currentConfigMap,
198+
fmt.Sprintf(serviceTemplate, configMapNs, service1),
199+
fmt.Sprintf(serviceTemplate, configMapNs, service4),
200+
fmt.Sprintf(serviceTemplate, configMapNs, service2),
201+
fmt.Sprintf(serviceTemplate, configMapNs, service3))
202+
Expect(k8sClient.Update(context.TODO(), currentConfigMap)).To(Succeed())
203+
204+
for _, serviceName := range []string{service1, service2, service3, service4} {
205+
Byf("Verifying Service %s is created in the workload cluster", serviceName)
206+
Eventually(func() error {
207+
currentService := &corev1.Service{}
208+
return workloadClient.Get(context.TODO(),
209+
types.NamespacedName{Namespace: configMapNs, Name: serviceName}, currentService)
210+
}, timeout, pollingInterval).Should(BeNil())
211+
}
212+
213+
deleteClusterProfile(clusterProfile)
214+
215+
currentNs := &corev1.Namespace{}
216+
Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: configMapNs}, currentNs)).To(Succeed())
217+
Expect(k8sClient.Delete(context.TODO(), currentNs)).To(Succeed())
218+
})
219+
})

0 commit comments

Comments
 (0)