@@ -60,7 +60,8 @@ func (impl *InformerImpl) getStopChannel(informerFactory kubeinformers.SharedInf
6060func (impl * InformerImpl ) checkIfPodDeletedAndUpdateMessage (podName , namespace string ,
6161 nodeStatus v1alpha1.NodeStatus , config * rest.Config ) (v1alpha1.NodeStatus , bool ) {
6262
63- if (nodeStatus .Phase == v1alpha1 .NodeFailed || nodeStatus .Phase == v1alpha1 .NodeError ) && (nodeStatus .Message == bean .ExitCode143Error || nodeStatus .Message == bean .NodeNoLongerExists ) {
63+ if (nodeStatus .Phase == v1alpha1 .NodeFailed || nodeStatus .Phase == v1alpha1 .NodeError ) && (nodeStatus .Message == bean .ExitCode143Error || nodeStatus .Message == bean .NodeNoLongerExists ||
64+ nodeStatus .Message == bean .NodeForceDeleted ) {
6465 clusterClient , k8sErr := impl .k8sUtil .GetK8sClientForConfig (config )
6566 if k8sErr != nil {
6667 return nodeStatus , false
@@ -84,20 +85,40 @@ func (impl *InformerImpl) checkIfPodDeletedAndUpdateMessage(podName, namespace s
8485
8586func (impl * InformerImpl ) assessNodeStatus (eventType string , pod * coreV1.Pod ) v1alpha1.NodeStatus {
8687 nodeStatus := v1alpha1.NodeStatus {}
87- switch pod .Status .Phase {
88- case coreV1 .PodPending :
89- nodeStatus .Phase = v1alpha1 .NodePending
90- nodeStatus .Message = getPendingReason (pod )
91- case coreV1 .PodSucceeded :
92- nodeStatus .Phase = v1alpha1 .NodeSucceeded
93- case coreV1 .PodFailed :
94- nodeStatus .Phase , nodeStatus .Message = impl .inferFailedReason (eventType , pod )
95- impl .logger .Infof ("Pod %s failed: %s" , pod .Name , nodeStatus .Message )
96- case coreV1 .PodRunning :
97- nodeStatus .Phase = v1alpha1 .NodeRunning
98- default :
99- nodeStatus .Phase = v1alpha1 .NodeError
100- nodeStatus .Message = fmt .Sprintf ("Unexpected pod phase for %s: %s" , pod .ObjectMeta .Name , pod .Status .Phase )
88+
89+ /*
90+ Special handling for delete events with force delete scenarios, Kubernetes does NOT guarantee that the pod status
91+ will be updated to "Failed" during force delete. Sometimes the pod phase can be "Running" even after force delete.
92+ Force deletion immediately removes the Pod object from the Kubernetes API server without waiting for
93+ the kubelet on the node to confirm termination.
94+
95+ If the application within the pod on the node is still running and hasn't received a termination signal or processed
96+ it yet, the container processes might continue to exist on the node even after the Pod object is gone from the API
97+ server. This can lead to a state where the pod effectively exists on the node, but Kubernetes no longer tracks it,
98+ and it might appear as Running if you were to inspect the node's process list.
99+ */
100+ if eventType == bean .DeleteEvent && isPodForceDeletedWhileRunning (pod ) {
101+ // Force delete detected - treat as failed regardless of current phase
102+ impl .logger .Infow ("Force delete detected for pod" , "podName" , pod .Name , "currentPhase" , pod .Status .Phase , "deletionGracePeriod" , * pod .DeletionGracePeriodSeconds )
103+ nodeStatus .Phase = v1alpha1 .NodeFailed
104+ nodeStatus .Message = bean .NodeForceDeleted
105+ return nodeStatus
106+ } else {
107+ switch pod .Status .Phase {
108+ case coreV1 .PodPending :
109+ nodeStatus .Phase = v1alpha1 .NodePending
110+ nodeStatus .Message = getPendingReason (pod )
111+ case coreV1 .PodSucceeded :
112+ nodeStatus .Phase = v1alpha1 .NodeSucceeded
113+ case coreV1 .PodFailed :
114+ nodeStatus .Phase , nodeStatus .Message = impl .inferFailedReason (eventType , pod )
115+ impl .logger .Infof ("Pod %s failed: %s" , pod .Name , nodeStatus .Message )
116+ case coreV1 .PodRunning :
117+ nodeStatus .Phase = v1alpha1 .NodeRunning
118+ default :
119+ nodeStatus .Phase = v1alpha1 .NodeError
120+ nodeStatus .Message = fmt .Sprintf ("Unexpected pod phase for %s: %s" , pod .ObjectMeta .Name , pod .Status .Phase )
121+ }
101122 }
102123
103124 // only update Pod IP for daemoned nodes to reduce number of updates
0 commit comments