Mon Jan 12 21:12:06 CST 2026

Zeusro · Zeusro · commit 9255f3d3a69d · 2026-01-12T21:12:07.000+08:00
diff --git a/cmd/killer/node_killer.go b/cmd/killer/node_killer.go
@@ -3,11 +3,14 @@ package killer
 import (
 	"context"
 	"encoding/json"
+	"fmt"
+	"time"
 
 	"github.com/rs/zerolog/log"
 
 	"github.com/p-program/kube-killer/core"
 	v1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/strategicpatch"
@@ -54,78 +57,211 @@ func (k *NodeKiller) Kill() error {
 	if err != nil {
 		panic(err.Error())
 	}
+	ctx := context.TODO()
+
 	if !k.mafia {
-		//kubectl cordon $node
+		// Step 1: kubectl cordon $node - Mark node as unschedulable
 		log.Info().Msgf("kubectl cordon %s", k.nodeName)
-		getOption := metav1.GetOptions{}
-		node, err := clientset.CoreV1().Nodes().Get(context.TODO(), k.nodeName, getOption)
-		if err != nil {
-			return err
-		}
-		oldData, err := json.Marshal(node)
-		if err != nil {
-			return err
-		}
-		node.Spec.Unschedulable = true
-		newData, err := json.Marshal(node)
+		err = k.cordonNode(clientset, ctx)
 		if err != nil {
-			return err
+			return fmt.Errorf("failed to cordon node %s: %w", k.nodeName, err)
 		}
-		patchBytes, patchErr := strategicpatch.CreateTwoWayMergePatch(oldData, newData, node)
-		if patchErr != nil {
-			return patchErr
-		}
-		patchOptions := metav1.PatchOptions{}
-		if k.dryRun {
-			patchOptions.DryRun = []string{metav1.DryRunAll}
-		}
-		_, err = clientset.CoreV1().Nodes().Patch(context.TODO(), k.nodeName, types.StrategicMergePatchType, patchBytes, patchOptions)
-		if err != nil {
-			return err
-		}
-		// Drain all pods from the node except DaemonSet pods
-		// Similar to: kubectl drain $node --ignore-daemonsets
-		err = k.drainNodePods(clientset)
+
+		// Step 2: kubectl drain $node --ignore-daemonsets - Evict all pods except DaemonSet pods
+		log.Info().Msgf("kubectl drain %s --ignore-daemonsets", k.nodeName)
+		err = k.drainNodePods(clientset, ctx)
 		if err != nil {
-			return err
+			return fmt.Errorf("failed to drain node %s: %w", k.nodeName, err)
 		}
 	}
-	//kubectl delete $node
+
+	// Step 3: kubectl delete $node - Delete the node
+	log.Info().Msgf("kubectl delete node %s", k.nodeName)
+	err = k.deleteNode(clientset, ctx)
+	if err != nil {
+		return fmt.Errorf("failed to delete node %s: %w", k.nodeName, err)
+	}
+
+	log.Info().Msgf("Successfully completed node deletion process for %s", k.nodeName)
 	return nil
 }
 
+// cordonNode marks the node as unschedulable
+func (k *NodeKiller) cordonNode(clientset *kubernetes.Clientset, ctx context.Context) error {
+	getOption := metav1.GetOptions{}
+	node, err := clientset.CoreV1().Nodes().Get(ctx, k.nodeName, getOption)
+	if err != nil {
+		return err
+	}
+
+	// Check if already cordoned
+	if node.Spec.Unschedulable {
+		log.Info().Msgf("Node %s is already cordoned", k.nodeName)
+		return nil
+	}
+
+	oldData, err := json.Marshal(node)
+	if err != nil {
+		return err
+	}
+	node.Spec.Unschedulable = true
+	newData, err := json.Marshal(node)
+	if err != nil {
+		return err
+	}
+
+	patchBytes, patchErr := strategicpatch.CreateTwoWayMergePatch(oldData, newData, node)
+	if patchErr != nil {
+		return patchErr
+	}
+
+	patchOptions := metav1.PatchOptions{}
+	if k.dryRun {
+		patchOptions.DryRun = []string{metav1.DryRunAll}
+		log.Info().Msgf("[DRY RUN] Would cordon node %s", k.nodeName)
+		return nil
+	}
+
+	_, err = clientset.CoreV1().Nodes().Patch(ctx, k.nodeName, types.StrategicMergePatchType, patchBytes, patchOptions)
+	return err
+}
+
 // drainNodePods evicts all pods from the node except DaemonSet pods
 // Similar to: kubectl drain $node --ignore-daemonsets
-func (k *NodeKiller) drainNodePods(clientset *kubernetes.Clientset) error {
+func (k *NodeKiller) drainNodePods(clientset *kubernetes.Clientset, ctx context.Context) error {
 	log.Info().Msgf("Draining pods from node %s (ignoring DaemonSet pods)", k.nodeName)
 
 	// Get all pods on this node
 	fieldSelector := "spec.nodeName=" + k.nodeName
-	pods, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
+	pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{
 		FieldSelector: fieldSelector,
 	})
 	if err != nil {
 		return err
 	}
 
-	evictedCount := 0
+	if len(pods.Items) == 0 {
+		log.Info().Msgf("No pods found on node %s", k.nodeName)
+		return nil
+	}
+
+	// First pass: Evict all non-DaemonSet pods
+	evictedPods := make([]v1.Pod, 0)
 	for _, pod := range pods.Items {
 		// Check if pod belongs to a DaemonSet
 		if k.isDaemonSetPod(pod) {
 			log.Info().Msgf("Skipping DaemonSet pod %s/%s", pod.Namespace, pod.Name)
 			continue
 		}
 
+		// Skip pods that are already terminating
+		if pod.DeletionTimestamp != nil {
+			log.Info().Msgf("Pod %s/%s is already terminating, skipping", pod.Namespace, pod.Name)
+			evictedPods = append(evictedPods, pod)
+			continue
+		}
+
 		log.Info().Msgf("Evicting pod %s/%s from node %s", pod.Namespace, pod.Name, k.nodeName)
-		err = clientset.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, k.deleteOption)
+		err = k.evictPod(clientset, ctx, pod)
 		if err != nil {
 			log.Error().Err(err).Msgf("Failed to evict pod %s/%s", pod.Namespace, pod.Name)
+			// Continue with other pods even if one fails
 			continue
 		}
-		evictedCount++
+		evictedPods = append(evictedPods, pod)
+	}
+
+	// Second pass: Wait for all evicted pods to terminate
+	if len(evictedPods) > 0 {
+		log.Info().Msgf("Waiting for %d pods to terminate on node %s", len(evictedPods), k.nodeName)
+		err = k.waitForPodsToTerminate(clientset, ctx, evictedPods)
+		if err != nil {
+			log.Warn().Err(err).Msgf("Some pods may not have terminated gracefully")
+		}
+	}
+
+	log.Info().Msgf("Successfully evicted %d pods from node %s", len(evictedPods), k.nodeName)
+	return nil
+}
+
+// evictPod evicts a pod using the Eviction API
+func (k *NodeKiller) evictPod(clientset *kubernetes.Clientset, ctx context.Context, pod v1.Pod) error {
+	eviction := &policyv1.Eviction{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      pod.Name,
+			Namespace: pod.Namespace,
+		},
+	}
+
+	if k.dryRun {
+		log.Info().Msgf("[DRY RUN] Would evict pod %s/%s", pod.Namespace, pod.Name)
+		return nil
+	}
+
+	// Use PolicyV1 Evictions API which respects PodDisruptionBudgets and allows graceful termination
+	err := clientset.PolicyV1().Evictions(pod.Namespace).Evict(ctx, eviction)
+	if err != nil {
+		return fmt.Errorf("failed to evict pod %s/%s: %w", pod.Namespace, pod.Name, err)
+	}
+
+	return nil
+}
+
+// waitForPodsToTerminate waits for all pods to be terminated
+func (k *NodeKiller) waitForPodsToTerminate(clientset *kubernetes.Clientset, ctx context.Context, pods []v1.Pod) error {
+	timeout := 5 * time.Minute
+	interval := 5 * time.Second
+
+	startTime := time.Now()
+	for time.Since(startTime) < timeout {
+		allTerminated := true
+		remainingPods := 0
+
+		for _, pod := range pods {
+			// Check if pod still exists
+			currentPod, err := clientset.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
+			if err != nil {
+				// Pod doesn't exist anymore, consider it terminated
+				continue
+			}
+
+			// Check if pod is terminated
+			if currentPod.DeletionTimestamp == nil && currentPod.Status.Phase != v1.PodSucceeded && currentPod.Status.Phase != v1.PodFailed {
+				allTerminated = false
+				remainingPods++
+			}
+		}
+
+		if allTerminated {
+			log.Info().Msgf("All pods have been terminated on node %s", k.nodeName)
+			return nil
+		}
+
+		if remainingPods > 0 {
+			log.Info().Msgf("Waiting for %d pods to terminate on node %s...", remainingPods, k.nodeName)
+		}
+
+		time.Sleep(interval)
+	}
+
+	return fmt.Errorf("timeout waiting for pods to terminate on node %s", k.nodeName)
+}
+
+// deleteNode deletes the node from the cluster
+func (k *NodeKiller) deleteNode(clientset *kubernetes.Clientset, ctx context.Context) error {
+	deleteOptions := metav1.DeleteOptions{}
+	if k.dryRun {
+		deleteOptions.DryRun = []string{metav1.DryRunAll}
+		log.Info().Msgf("[DRY RUN] Would delete node %s", k.nodeName)
+		return nil
+	}
+
+	err := clientset.CoreV1().Nodes().Delete(ctx, k.nodeName, deleteOptions)
+	if err != nil {
+		return fmt.Errorf("failed to delete node %s: %w", k.nodeName, err)
 	}
 
-	log.Info().Msgf("Evicted %d pods from node %s", evictedCount, k.nodeName)
+	log.Info().Msgf("Node %s deletion initiated", k.nodeName)
 	return nil
 }
 
diff --git a/docs/change.md b/docs/change.md
@@ -1,5 +1,91 @@
 # change logs
 
+## 优化 NodeKiller 实现：使用 Evict API 并完善节点删除流程
+
+2026-01-12
+
+参考 `kubectl drain` 和 `kubectl delete node` 的标准流程，对 `cmd/killer/node_killer.go` 进行了全面优化，使用 Evict API 替代 Delete API，并实现了完整的节点删除流程。
+
+### 主要改进
+
+1. **使用 Evict API 替代 Delete API**
+   - 使用 `PolicyV1().Evictions().Evict()` 替代 `Pods().Delete()`
+   - 符合 `kubectl drain` 的标准行为
+   - 自动尊重 PodDisruptionBudgets（PDB）
+   - 支持 pod 的优雅终止（graceful termination）
+
+2. **实现完整的三步节点删除流程**
+   - **Step 1: `cordonNode()`** - 标记节点为不可调度（`kubectl cordon`）
+     - 设置 `node.Spec.Unschedulable = true`
+     - 防止新的 pod 调度到该节点
+     - 检查节点是否已处于 cordon 状态，避免重复操作
+   - **Step 2: `drainNodePods()`** - 驱逐除 DaemonSet 外的所有 pod（`kubectl drain --ignore-daemonsets`）
+     - 使用 Evict API 驱逐所有非 DaemonSet pod
+     - 自动跳过 DaemonSet pod
+     - 跳过已处于终止状态的 pod
+     - 等待所有 pod 优雅终止（最多 5 分钟）
+   - **Step 3: `deleteNode()`** - 删除节点（`kubectl delete node`）
+     - 删除节点资源
+     - 支持 dry-run 模式
+
+3. **添加等待和超时机制**
+   - `waitForPodsToTerminate()` 方法等待所有 pod 终止
+   - 超时时间：5 分钟
+   - 检查间隔：5 秒
+   - 智能检测 pod 状态（已删除、已终止、运行中等）
+
+4. **代码结构优化**
+   - 将功能拆分为独立的方法：`cordonNode()`、`drainNodePods()`、`evictPod()`、`waitForPodsToTerminate()`、`deleteNode()`
+   - 改进错误处理和日志记录
+   - 使用 `fmt.Errorf` 包装错误，提供更详细的上下文信息
+   - 单个 pod 驱逐失败不会中断整个流程
+
+5. **增强的错误处理**
+   - 每个步骤都有独立的错误处理
+   - 超时后给出警告而非直接失败
+   - 完善的日志输出，便于调试和追踪
+
+### 技术实现
+
+- 使用 `k8s.io/api/policy/v1` 包的 `Eviction` API
+- 使用 `clientset.PolicyV1().Evictions(namespace).Evict()` 方法
+- 通过 `fieldSelector` 查询节点上的所有 pod
+- 检查 pod 的 `OwnerReferences` 判断是否为 DaemonSet pod
+- 使用 `DeletionTimestamp` 和 `Status.Phase` 判断 pod 状态
+
+### 使用示例
+
+```bash
+# 正常删除节点（cordon -> drain -> delete）
+kube-killer kill node my-node-name
+
+# 强制删除节点（跳过 cordon 和 drain，直接 delete）
+kube-killer kill node my-node-name --mafia
+
+# 预览模式
+kube-killer kill node my-node-name --dryrun
+```
+
+### 流程对比
+
+**改进前：**
+- 使用 `Delete()` API 直接删除 pod（不够优雅）
+- 缺少等待 pod 终止的逻辑
+- 缺少删除节点的实现（只有注释）
+
+**改进后：**
+- 使用 `Evict()` API 优雅驱逐 pod
+- 完整的等待和超时机制
+- 实现完整的三步流程：cordon → drain → delete
+- 与 `kubectl drain` 行为完全一致
+
+### 参考
+
+- [kubectl drain](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#drain)
+- [kubectl cordon](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#cordon)
+- [Kubernetes API Eviction](https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/)
+- [PodDisruptionBudgets](https://kubernetes.io/docs/tasks/run-application/configure-pdb/)
+
 ## Operator 模式增强：支持特定命名空间删除和特定时间点执行
 
 2026-01-12