Skip to content

Commit 234f0fc

Browse files
authored
Remove SubnetPort when Pod is in terminating state (#1441)
Previously, the logic ignored Pods in a terminating state to allow for graceful shutdown. However, if a Namespace (NS) or StatefulSet (STS) is deleted and immediately recreated with the same name, the lingering terminating Pods can interfere with garbage collection (GC) or cause resource conflicts. This change ensures that the SubnetPort is promptly removed when a Pod enters the terminating state, preventing GC failures during rapid delete-and-recreate scenarios. Check the pod sts ID to avoid pod belonging to the new sts
1 parent 7244967 commit 234f0fc

4 files changed

Lines changed: 184 additions & 6 deletions

File tree

pkg/controllers/common/utils.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,5 +614,6 @@ func PodIsDeleted(pod *v1.Pod) bool {
614614
return false
615615
}
616616
return pod.Status.Phase == v1.PodSucceeded ||
617-
pod.Status.Phase == v1.PodFailed
617+
pod.Status.Phase == v1.PodFailed ||
618+
!pod.ObjectMeta.DeletionTimestamp.IsZero()
618619
}

pkg/controllers/common/utils_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1164,7 +1164,7 @@ func TestPodIsDeleted(t *testing.T) {
11641164
{"terminating_running", &v1.Pod{
11651165
ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now, Finalizers: []string{"x"}},
11661166
Status: v1.PodStatus{Phase: v1.PodRunning},
1167-
}, false},
1167+
}, true},
11681168
{"succeeded", &v1.Pod{Status: v1.PodStatus{Phase: v1.PodSucceeded}}, true},
11691169
{"failed", &v1.Pod{Status: v1.PodStatus{Phase: v1.PodFailed}}, true},
11701170
}

pkg/controllers/statefulset/statefulset_controller.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,10 @@ func (r *StatefulSetReconciler) CollectGarbage(ctx context.Context) error {
322322
if podName != "" && namespace != "" {
323323
pod := &corev1.Pod{}
324324
if err := r.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: podName}, pod); err == nil && !common.PodIsDeleted(pod) {
325-
log.Debug("GC: pod still exists, skipping orphaned port deletion", "pod", podName)
326-
continue
325+
if isPodBelongToStatefulSet(pod, types.UID(stsID)) {
326+
log.Debug("GC: pod still exists and belongs to StatefulSet, skipping orphaned port deletion", "pod", podName)
327+
continue
328+
}
327329
}
328330
}
329331
log.Debug("Found orphaned subnet port for deleted StatefulSet", "port", *port.Id, "stsUID", stsID)
@@ -339,6 +341,15 @@ func (r *StatefulSetReconciler) CollectGarbage(ctx context.Context) error {
339341
return nil
340342
}
341343

344+
func isPodBelongToStatefulSet(pod *corev1.Pod, stsID types.UID) bool {
345+
for _, owner := range pod.OwnerReferences {
346+
if owner.Kind == "StatefulSet" && owner.UID == stsID {
347+
return true
348+
}
349+
}
350+
return false
351+
}
352+
342353
func (r *StatefulSetReconciler) GetOrdinalRange(sts *appsv1.StatefulSet) (int, int) {
343354
start := 0
344355
if sts.Spec.Ordinals != nil {

pkg/controllers/statefulset/statefulset_controller_test.go

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,8 +1235,8 @@ func TestReleaseSubnetPortsForStatefulSet_PodTerminatingSkipsDeleteWithMatchingU
12351235

12361236
pending, err := r.releaseSubnetPortsForStatefulSet(context.Background(), "default", "test-sts")
12371237
assert.NoError(t, err)
1238-
assert.True(t, pending, "terminating-but-not-terminal pod should defer delete and surface pending for requeue")
1239-
assert.Equal(t, 0, deleteCalls, "PodIsDeleted is terminal phase only: Running+DeletionTimestamp must not release port when UID still matches")
1238+
assert.False(t, pending, "terminating-but-not-terminal pod should defer delete and surface pending for requeue")
1239+
assert.Equal(t, 1, deleteCalls, "PodIsDeleted is terminal phase only: Running+DeletionTimestamp must not release port when UID still matches")
12401240
}
12411241

12421242
func TestReleaseSubnetPortsForStatefulSet_PodUIDMismatchDeletesPort(t *testing.T) {
@@ -2077,3 +2077,169 @@ func TestCollectGarbage_GetPodErrorDoesNotSkipDelete(t *testing.T) {
20772077
require.NoError(t, r.CollectGarbage(context.Background()))
20782078
assert.GreaterOrEqual(t, deleteCalls, 1)
20792079
}
2080+
2081+
func TestCollectGarbage_OrphanedPortPodBelongsToSts_SkipsDelete(t *testing.T) {
2082+
// Setup: orphaned port (stsID not in statefulSetUIDs), pod existed and ownerReference matches stsID
2083+
fakeClient := fake.NewClientBuilder().WithObjects(&corev1.Pod{
2084+
ObjectMeta: metav1.ObjectMeta{
2085+
Name: "test-pod-0",
2086+
Namespace: "default",
2087+
OwnerReferences: []metav1.OwnerReference{{
2088+
Kind: "StatefulSet",
2089+
UID: "deleted-sts-uid",
2090+
}},
2091+
},
2092+
}).Build()
2093+
subnetPortService := &subnetportservice.SubnetPortService{
2094+
Service: servicecommon.Service{
2095+
NSXClient: &nsx.Client{},
2096+
NSXConfig: testNSXConfigWithStatefulSetPodEnhance(),
2097+
},
2098+
SubnetPortStore: &subnetportservice.SubnetPortStore{},
2099+
}
2100+
2101+
r := &StatefulSetReconciler{
2102+
Client: fakeClient,
2103+
SubnetPortService: subnetPortService,
2104+
}
2105+
defer patchNSXClientStatefulSetPodVersion(t, true)()
2106+
2107+
// Patch GetByIndex: only orphaned port
2108+
patches := gomonkey.ApplyFunc(
2109+
(*subnetportservice.SubnetPortStore).GetByIndex,
2110+
func(s *subnetportservice.SubnetPortStore, indexKey string, indexValue string) []*model.VpcSubnetPort {
2111+
if indexKey == servicecommon.IndexKeyAllStsPorts {
2112+
stsUIDScope := "nsx-op/sts_uid"
2113+
nsScope := "nsx-op/namespace"
2114+
podNameScope := "nsx-op/pod_name"
2115+
return []*model.VpcSubnetPort{
2116+
{Id: servicecommon.String("port-orphan"),
2117+
Tags: []model.Tag{
2118+
{Scope: &stsUIDScope, Tag: servicecommon.String("deleted-sts-uid")},
2119+
{Scope: &nsScope, Tag: servicecommon.String("default")},
2120+
{Scope: &podNameScope, Tag: servicecommon.String("test-pod-0")},
2121+
}},
2122+
}
2123+
}
2124+
if indexKey == servicecommon.TagScopeStatefulSetUID {
2125+
return []*model.VpcSubnetPort{}
2126+
}
2127+
return []*model.VpcSubnetPort{}
2128+
})
2129+
var deleteCalls int
2130+
patches.ApplyFunc(
2131+
(*subnetportservice.SubnetPortService).DeleteSubnetPort,
2132+
func(s *subnetportservice.SubnetPortService, port *model.VpcSubnetPort) error {
2133+
deleteCalls++
2134+
return nil
2135+
})
2136+
defer patches.Reset()
2137+
2138+
err := r.CollectGarbage(context.Background())
2139+
assert.NoError(t, err)
2140+
assert.Equal(t, 0, deleteCalls, "orphaned port should not be deleted when pod belongs to the deleted sts")
2141+
}
2142+
2143+
func Test_isPodBelongToStatefulSet(t *testing.T) {
2144+
targetStsUID := types.UID("sts-uid-111")
2145+
wrongStsUID := types.UID("sts-uid-222")
2146+
2147+
tests := []struct {
2148+
name string
2149+
pod *corev1.Pod
2150+
stsUID types.UID
2151+
expected bool
2152+
}{
2153+
{
2154+
name: "pod owner matches correct StatefulSet",
2155+
pod: &corev1.Pod{
2156+
ObjectMeta: metav1.ObjectMeta{
2157+
OwnerReferences: []metav1.OwnerReference{
2158+
{
2159+
Kind: "StatefulSet",
2160+
Name: "my-sts",
2161+
APIVersion: "apps/v1",
2162+
UID: targetStsUID,
2163+
},
2164+
},
2165+
},
2166+
},
2167+
stsUID: targetStsUID,
2168+
expected: true,
2169+
},
2170+
{
2171+
name: "pod owner is StatefulSet but UID mismatches",
2172+
pod: &corev1.Pod{
2173+
ObjectMeta: metav1.ObjectMeta{
2174+
OwnerReferences: []metav1.OwnerReference{
2175+
{
2176+
Kind: "StatefulSet",
2177+
Name: "my-sts",
2178+
APIVersion: "apps/v1",
2179+
UID: wrongStsUID,
2180+
},
2181+
},
2182+
},
2183+
},
2184+
stsUID: targetStsUID,
2185+
expected: false,
2186+
},
2187+
{
2188+
name: "pod owner UID matches but Kind is not StatefulSet",
2189+
pod: &corev1.Pod{
2190+
ObjectMeta: metav1.ObjectMeta{
2191+
OwnerReferences: []metav1.OwnerReference{
2192+
{
2193+
Kind: "Deployment",
2194+
Name: "my-deploy",
2195+
APIVersion: "apps/v1",
2196+
UID: targetStsUID,
2197+
},
2198+
},
2199+
},
2200+
},
2201+
stsUID: targetStsUID,
2202+
expected: false,
2203+
},
2204+
{
2205+
name: "pod has multiple owners and one matches",
2206+
pod: &corev1.Pod{
2207+
ObjectMeta: metav1.ObjectMeta{
2208+
OwnerReferences: []metav1.OwnerReference{
2209+
{
2210+
Kind: "ReplicaSet",
2211+
Name: "my-rs",
2212+
APIVersion: "apps/v1",
2213+
UID: "rs-uid",
2214+
},
2215+
{
2216+
Kind: "StatefulSet",
2217+
Name: "my-sts",
2218+
APIVersion: "apps/v1",
2219+
UID: targetStsUID,
2220+
},
2221+
},
2222+
},
2223+
},
2224+
stsUID: targetStsUID,
2225+
expected: true,
2226+
},
2227+
{
2228+
name: "pod has no owner references",
2229+
pod: &corev1.Pod{
2230+
ObjectMeta: metav1.ObjectMeta{
2231+
OwnerReferences: nil,
2232+
},
2233+
},
2234+
stsUID: targetStsUID,
2235+
expected: false,
2236+
},
2237+
}
2238+
2239+
for _, tt := range tests {
2240+
t.Run(tt.name, func(t *testing.T) {
2241+
actual := isPodBelongToStatefulSet(tt.pod, tt.stsUID)
2242+
assert.Equal(t, tt.expected, actual)
2243+
})
2244+
}
2245+
}

0 commit comments

Comments
 (0)