Skip to content

Commit d413aa4

Browse files
committed
chore: Restrict fix/service-account-deletion to SA race condition only
Move backend.go and backend_test.go resource cleanup changes to review/service-account-deletion (PR #1423) per reviewer request. PR #1421 should only contain the ServiceAccount deletion race condition fix. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Assisted-by: Claude Code:claude [Claude Code]
1 parent 7c134e7 commit d413aa4

2 files changed

Lines changed: 37 additions & 21 deletions

File tree

compute/kubernetes/backend.go

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,29 @@ func (b *Backend) cleanResources(ctx context.Context, taskId string) error {
296296
b.log.Error("deleting Job", "error", err)
297297
}
298298

299+
// Delete PVC
300+
err = resources.DeletePVC(ctx, taskId, b.conf.Kubernetes.JobsNamespace, b.client, b.log)
301+
if err != nil {
302+
errs = multierror.Append(errs, err)
303+
b.log.Error("deleting Worker PVC", "error", err)
304+
}
305+
306+
// Delete per-task ConfigMap only if ConfigMapTemplate was configured
307+
if b.conf.Kubernetes.ConfigMapTemplate != "" {
308+
err = resources.DeleteConfigMap(ctx, taskId, b.conf.Kubernetes.JobsNamespace, b.client, b.log)
309+
if err != nil {
310+
errs = multierror.Append(errs, err)
311+
b.log.Error("deleting Worker ConfigMap", "error", err)
312+
}
313+
}
314+
315+
// Delete RoleBinding
316+
err = resources.DeleteRoleBinding(ctx, taskId, b.conf.Kubernetes.JobsNamespace, b.client, b.log)
317+
if err != nil {
318+
errs = multierror.Append(errs, err)
319+
b.log.Error("deleting Job", "error", err)
320+
}
321+
299322
// Determine the ServiceAccount for this task.
300323
// Default to the conventional task-scoped name; override if the task
301324
// specifies an externally-managed SA via the _WORKER_SA tag.
@@ -314,6 +337,13 @@ func (b *Backend) cleanResources(ctx context.Context, taskId string) error {
314337
b.log.Error("deleting Worker ServiceAccount", "taskID", taskId, "error", err)
315338
}
316339

340+
// Delete Role
341+
err = resources.DeleteRole(ctx, taskId, b.conf.Kubernetes.JobsNamespace, b.client, b.log)
342+
if err != nil {
343+
errs = multierror.Append(errs, err)
344+
b.log.Error("deleting Worker Role", "error", err)
345+
}
346+
317347
// Delete PV
318348
err = resources.DeletePV(ctx, taskId, b.conf.Kubernetes.JobsNamespace, b.client, b.log)
319349
if err != nil {
@@ -671,8 +701,6 @@ func (b *Backend) CleanOrphanedResources(ctx context.Context) {
671701
}
672702
}
673703

674-
// TODO: Add Executor Jobs here beacause orphaned tasks can result in orphaned jobs
675-
676704
for taskID := range taskIDs {
677705
clean, err := b.isResourceCleanupNeeded(ctx, taskID)
678706
if err != nil {

compute/kubernetes/backend_test.go

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,6 @@ func TestTaskSubmission(t *testing.T) {
8686
// Create a fake Kubernetes client
8787
fakeClient := fake.NewSimpleClientset()
8888

89-
// Inject a deterministic UID on every Job create so ownerRef propagation can be verified.
90-
const testJobUID = "test-job-uid-1234"
91-
fakeClient.PrependReactor("create", "jobs", func(action k8stesting.Action) (bool, runtime.Object, error) {
92-
obj := action.(k8stesting.CreateAction).GetObject().(*batchv1.Job)
93-
obj.UID = testJobUID
94-
return false, obj, nil
95-
})
96-
9789
// Create a mock configuration
9890
conf := config.DefaultConfig()
9991
conf.Kubernetes.Namespace = "test-namespace"
@@ -163,18 +155,12 @@ spec:
163155
t.Errorf("expected Job name '%s', got '%s'", task.Id, job.Name)
164156
}
165157

166-
// Verify that the ConfigMap was created with the Job's UID in its ownerRef.
158+
// Verify that the ConfigMap was created
167159
configMapName := "funnel-worker-config-" + task.Id
168-
cm, err := fakeClient.CoreV1().ConfigMaps(conf.Kubernetes.JobsNamespace).Get(context.Background(), configMapName, metav1.GetOptions{})
160+
_, err = fakeClient.CoreV1().ConfigMaps(conf.Kubernetes.JobsNamespace).Get(context.Background(), configMapName, metav1.GetOptions{})
169161
if err != nil {
170162
t.Fatalf("failed to get ConfigMap: %v", err)
171163
}
172-
if len(cm.OwnerReferences) == 0 {
173-
t.Fatal("expected ConfigMap to have an ownerReference, but got none")
174-
}
175-
if got := cm.OwnerReferences[0].UID; got != testJobUID {
176-
t.Errorf("expected ConfigMap ownerRef UID %q, got %q", testJobUID, got)
177-
}
178164

179165
// Clean up resources
180166
err = backend.cleanResources(context.Background(), task.Id)
@@ -188,9 +174,11 @@ spec:
188174
t.Error("expected Job to be deleted, but it still exists")
189175
}
190176

191-
// ConfigMap deletion is handled by Kubernetes garbage collection via ownerReferences,
192-
// not explicitly by cleanResources. The fake clientset does not simulate cascading GC,
193-
// so we only verify the ownerRef is set correctly (asserted above).
177+
// Verify that the ConfigMap was deleted
178+
_, err = fakeClient.CoreV1().ConfigMaps(conf.Kubernetes.JobsNamespace).Get(context.Background(), configMapName, metav1.GetOptions{})
179+
if err == nil {
180+
t.Error("expected ConfigMap to be deleted, but it still exists")
181+
}
194182

195183
}
196184

0 commit comments

Comments
 (0)