@@ -17,6 +17,8 @@ package controller
1717import (
1818 "context"
1919 "errors"
20+ "fmt"
21+ "time"
2022
2123 . "github.com/onsi/ginkgo/v2"
2224 . "github.com/onsi/gomega"
@@ -47,7 +49,7 @@ func (f *fakeInstaller) InstallOrUpgrade(_ context.Context, _ []byte, _ map[stri
4749 return f .installErr
4850}
4951
50- func (f * fakeInstaller ) Uninstall (_ context.Context ) error {
52+ func (f * fakeInstaller ) Uninstall (_ context.Context , _ time. Duration ) error {
5153 f .uninstallCalled = true
5254 return f .uninstallErr
5355}
@@ -464,22 +466,22 @@ var _ = Describe("GpuReconciler", func() {
464466 })
465467
466468 Describe ("deletion" , func () {
467- It ( "calls Helm uninstall and removes the finalizer" , func () {
469+ BeforeEach ( func () {
468470 newGpu (gpuName )
469471 _ , _ = reconciler .Reconcile (ctx , req ) // adds finalizer
470472 newGpuNode ("gpu-node-del" , "g4dn.xlarge" , "Garden Linux 1312.3" )
471473 DeferCleanup (deleteNode , "gpu-node-del" )
472474 _ , err := reconciler .Reconcile (ctx , req )
473475 Expect (err ).NotTo (HaveOccurred ())
474476 Expect (installer .installCalls ).To (Equal (1 ))
477+ })
475478
476- By ( "deleting the CR" )
479+ It ( "calls Helm uninstall and removes the finalizer on success" , func () {
477480 gpu := & gpuv1beta1.Gpu {}
478481 Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
479482 Expect (k8sClient .Delete (ctx , gpu )).To (Succeed ())
480483
481- By ("reconciling the deletion" )
482- _ , err = reconciler .Reconcile (ctx , req )
484+ _ , err := reconciler .Reconcile (ctx , req )
483485 Expect (err ).NotTo (HaveOccurred ())
484486 Expect (installer .uninstallCalled ).To (BeTrue ())
485487
@@ -489,6 +491,88 @@ var _ = Describe("GpuReconciler", func() {
489491 Expect (gpu .Finalizers ).NotTo (ContainElement (finalizer ))
490492 }
491493 })
494+
495+ It ("returns an error and keeps the finalizer on non-timeout Helm failure" , func () {
496+ installer .uninstallErr = errors .New ("simulated uninstall failure" )
497+
498+ gpu := & gpuv1beta1.Gpu {}
499+ Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
500+ Expect (k8sClient .Delete (ctx , gpu )).To (Succeed ())
501+
502+ _ , err := reconciler .Reconcile (ctx , req )
503+ Expect (err ).To (HaveOccurred ())
504+ Expect (err .Error ()).To (ContainSubstring ("helm uninstall" ))
505+
506+ // Finalizer must still be present so the CR is not lost.
507+ gpu = & gpuv1beta1.Gpu {}
508+ Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
509+ Expect (gpu .Finalizers ).To (ContainElement (finalizer ))
510+ })
511+
512+ It ("force-removes the finalizer when Helm uninstall times out" , func () {
513+ installer .uninstallErr = fmt .Errorf ("uninstalling gpu-operator: %w" , context .DeadlineExceeded )
514+
515+ ns := & corev1.Namespace {ObjectMeta : metav1.ObjectMeta {Name : gpuOperatorNamespace }}
516+ err := k8sClient .Create (ctx , ns )
517+ if err != nil {
518+ Expect (err .Error ()).To (ContainSubstring ("already exists" ))
519+ }
520+
521+ gpu := & gpuv1beta1.Gpu {}
522+ Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
523+ Expect (k8sClient .Delete (ctx , gpu )).To (Succeed ())
524+
525+ _ , err = reconciler .Reconcile (ctx , req )
526+ Expect (err ).NotTo (HaveOccurred (), "timeout must force-remove finalizer, not block the CR forever" )
527+
528+ // Namespace cleanup must be attempted even on timeout.
529+ liveNs := & corev1.Namespace {}
530+ err = k8sClient .Get (ctx , types.NamespacedName {Name : gpuOperatorNamespace }, liveNs )
531+ if err == nil {
532+ Expect (liveNs .DeletionTimestamp ).NotTo (BeNil (), "namespace should be terminating even after timeout" )
533+ }
534+
535+ gpu = & gpuv1beta1.Gpu {}
536+ err = k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )
537+ if err == nil {
538+ Expect (gpu .Finalizers ).NotTo (ContainElement (finalizer ))
539+ }
540+ })
541+
542+ It ("deletes the gpu-operator namespace after successful Helm uninstall" , func () {
543+ ns := & corev1.Namespace {ObjectMeta : metav1.ObjectMeta {Name : gpuOperatorNamespace }}
544+ err := k8sClient .Create (ctx , ns )
545+ if err != nil {
546+ // Namespace may already exist (e.g. terminating from a prior test) - that's fine,
547+ // we just need it to be present so deleteNamespace can act on it.
548+ Expect (err .Error ()).To (ContainSubstring ("already exists" ))
549+ }
550+
551+ gpu := & gpuv1beta1.Gpu {}
552+ Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
553+ Expect (k8sClient .Delete (ctx , gpu )).To (Succeed ())
554+
555+ _ , err = reconciler .Reconcile (ctx , req )
556+ Expect (err ).NotTo (HaveOccurred ())
557+ Expect (installer .uninstallCalled ).To (BeTrue (), "Uninstall must be called before namespace cleanup" )
558+
559+ liveNs := & corev1.Namespace {}
560+ err = k8sClient .Get (ctx , types.NamespacedName {Name : gpuOperatorNamespace }, liveNs )
561+ // Either deleted (NotFound) or marked for deletion (DeletionTimestamp set).
562+ if err == nil {
563+ Expect (liveNs .DeletionTimestamp ).NotTo (BeNil (), "namespace should be terminating" )
564+ }
565+ })
566+
567+ It ("ignores NotFound when deleting the gpu-operator namespace" , func () {
568+ // Namespace does not exist - deleteNamespace must not return an error.
569+ gpu := & gpuv1beta1.Gpu {}
570+ Expect (k8sClient .Get (ctx , types.NamespacedName {Name : gpuName }, gpu )).To (Succeed ())
571+ Expect (k8sClient .Delete (ctx , gpu )).To (Succeed ())
572+
573+ _ , err := reconciler .Reconcile (ctx , req )
574+ Expect (err ).NotTo (HaveOccurred ())
575+ })
492576 })
493577})
494578
0 commit comments