@@ -23,6 +23,7 @@ import (
2323 "k8s.io/client-go/kubernetes"
2424 "k8s.io/client-go/tools/clientcmd"
2525
26+ "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops"
2627 "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport"
2728 "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/utils"
2829 "github.com/hetznercloud/hcloud-go/v2/hcloud"
@@ -112,10 +113,21 @@ func (tc *TestCluster) Stop() error {
112113 errs := make ([]error , 0 , tc .loadBalancers .Size ()+ tc .certificates .Size ())
113114 ctx := context .Background ()
114115
115- for _ , item := range tc .loadBalancers .All () {
116- fmt .Printf ("deleting load balancer %d\n " , item )
117- if _ , err := tc .hcloud .LoadBalancer .Delete (ctx , & hcloud.LoadBalancer {ID : item }); err != nil {
118- errs = append (errs , fmt .Errorf ("delete load balancer %d failed: %w" , item , err ))
116+ // Leak sweep: any registered Load Balancer still present here means the
117+ // hccm finalizer did not release it during namespace teardown. This can
118+ // cause issues when deleting the Private Network afterward.
119+ for _ , id := range tc .loadBalancers .All () {
120+ lb , _ , err := tc .hcloud .LoadBalancer .GetByID (ctx , id )
121+ if err != nil {
122+ errs = append (errs , fmt .Errorf ("checking load balancer %d for leak: %w" , id , err ))
123+ continue
124+ }
125+ if lb == nil {
126+ continue // released by hccm finalizer, nothing to do
127+ }
128+ fmt .Printf ("force-deleting leaked load balancer %d (%s)\n " , id , lb .Name )
129+ if _ , err := tc .hcloud .LoadBalancer .Delete (ctx , lb ); err != nil {
130+ errs = append (errs , fmt .Errorf ("delete leaked load balancer %d failed: %w" , id , err ))
119131 }
120132 }
121133
@@ -228,7 +240,7 @@ func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) {
228240 return nil , fmt .Errorf ("could not create test pod: %w" , err )
229241 }
230242
231- err = wait .PollUntilContextTimeout (ctx , 1 * time .Second , 1 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
243+ err = wait .PollUntilContextTimeout (ctx , 1 * time .Second , 2 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
232244 p , err := testCluster .k8sClient .CoreV1 ().Pods (l .namespace ).Get (ctx , podName , metav1.GetOptions {})
233245 if err != nil {
234246 return false , err
@@ -242,7 +254,7 @@ func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) {
242254 return false , nil
243255 })
244256 if err != nil {
245- return nil , fmt .Errorf ("pod %s did not come up after 1 minute : %w" , podName , err )
257+ return nil , fmt .Errorf ("pod %s did not come up after 2 minutes : %w" , podName , err )
246258 }
247259
248260 return pod , nil
@@ -299,6 +311,9 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er
299311 }
300312
301313 if len (svc .Status .LoadBalancer .Ingress ) > 0 {
314+ if err := testCluster .registerServiceLoadBalancers (ctx , svc ); err != nil {
315+ return nil , err
316+ }
302317 return svc , nil
303318 }
304319
@@ -311,6 +326,24 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er
311326 }
312327}
313328
329+ // registerServiceLoadBalancers looks up the hcloud Load Balancers that hccm
330+ // created for svc (identified by the service-uid label) and tracks their IDs
331+ // so TestCluster.Stop can detect finalizer leaks after the suite runs.
332+ func (tc * TestCluster ) registerServiceLoadBalancers (ctx context.Context , svc * corev1.Service ) error {
333+ lbs , err := tc .hcloud .LoadBalancer .AllWithOpts (ctx , hcloud.LoadBalancerListOpts {
334+ ListOpts : hcloud.ListOpts {
335+ LabelSelector : fmt .Sprintf ("%s=%s" , hcops .LabelServiceUID , svc .UID ),
336+ },
337+ })
338+ if err != nil {
339+ return fmt .Errorf ("listing hcloud load balancers for service %s/%s: %w" , svc .Namespace , svc .Name , err )
340+ }
341+ for _ , lb := range lbs {
342+ tc .loadBalancers .Add (lb .ID )
343+ }
344+ return nil
345+ }
346+
314347// TearDown deletes the created pod and service.
315348func (l * lbTestHelper ) TearDown () {
316349 l .t .Helper ()
@@ -323,7 +356,15 @@ func (l *lbTestHelper) TearDown() {
323356
324357 // Use context.Background() rather than t.Context(): cleanup must run to
325358 // completion even when the test has already been cancelled or failed.
326- err := wait .PollUntilContextTimeout (context .Background (), 1 * time .Second , 3 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
359+ ctx := context .Background ()
360+
361+ // Delete LoadBalancer Services explicitly before the namespace. If the
362+ // hccm finalizer is stuck releasing Hetzner resources, the error surfaces
363+ // here as an attributable Service-delete timeout instead of a generic
364+ // namespace-delete timeout.
365+ l .deleteLoadBalancerServices (ctx )
366+
367+ err := wait .PollUntilContextTimeout (ctx , 1 * time .Second , 3 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
327368 err := testCluster .k8sClient .CoreV1 ().Namespaces ().Delete (ctx , l .namespace , metav1.DeleteOptions {})
328369 if err != nil && ! k8serrors .IsNotFound (err ) {
329370 return false , err
@@ -336,9 +377,41 @@ func (l *lbTestHelper) TearDown() {
336377 }
337378}
338379
380+ func (l * lbTestHelper ) deleteLoadBalancerServices (ctx context.Context ) {
381+ svcClient := testCluster .k8sClient .CoreV1 ().Services (l .namespace )
382+
383+ svcList , err := svcClient .List (ctx , metav1.ListOptions {})
384+ if err != nil {
385+ if ! k8serrors .IsNotFound (err ) {
386+ l .t .Logf ("error listing services in namespace %s: %v" , l .namespace , err )
387+ }
388+ return
389+ }
390+
391+ for _ , svc := range svcList .Items {
392+ if svc .Spec .Type != corev1 .ServiceTypeLoadBalancer {
393+ continue
394+ }
395+ if err := svcClient .Delete (ctx , svc .Name , metav1.DeleteOptions {}); err != nil && ! k8serrors .IsNotFound (err ) {
396+ l .t .Logf ("error deleting service %s/%s: %v" , l .namespace , svc .Name , err )
397+ continue
398+ }
399+ err := wait .PollUntilContextTimeout (ctx , 1 * time .Second , 2 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
400+ _ , err := svcClient .Get (ctx , svc .Name , metav1.GetOptions {})
401+ if k8serrors .IsNotFound (err ) {
402+ return true , nil
403+ }
404+ return false , err
405+ })
406+ if err != nil {
407+ l .t .Logf ("service %s/%s did not delete within 2m (hccm finalizer may be stuck): %v" , l .namespace , svc .Name , err )
408+ }
409+ }
410+ }
411+
339412// WaitForHTTPAvailable tries to connect to the given IP via HTTP or HTTPS
340413// (controlled by useHTTPS). It uses exponential backoff starting at 1s and
341- // capping at 30s, waiting up to 6 minutes for a successful HTTP 200 response.
414+ // capping at 30s, waiting up to 8 minutes for a successful HTTP 200 response.
342415// Each individual request has a 5s timeout.
343416func (l * lbTestHelper ) WaitForHTTPAvailable (ingressIP string , useHTTPS bool ) error {
344417 l .t .Helper ()
@@ -356,7 +429,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err
356429 proto = "https"
357430 }
358431
359- ctx , cancel := context .WithTimeout (l .t .Context (), 6 * time .Minute )
432+ ctx , cancel := context .WithTimeout (l .t .Context (), 8 * time .Minute )
360433 defer cancel ()
361434
362435 retries := 0
@@ -378,7 +451,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err
378451
379452 select {
380453 case <- ctx .Done ():
381- return fmt .Errorf ("timed out after 6m waiting for %s to be available" , ingressIP )
454+ return fmt .Errorf ("timed out after 8m waiting for %s to be available" , ingressIP )
382455 case <- time .After (pollBackoff (retries )):
383456 retries ++
384457 }
0 commit comments