@@ -11,10 +11,10 @@ import (
1111 "net/http"
1212 "os"
1313 "strconv"
14+ "strings"
1415 "testing"
1516 "time"
1617
17- "github.com/stretchr/testify/require"
1818 hrobot "github.com/syself/hrobot-go"
1919 corev1 "k8s.io/api/core/v1"
2020 k8serrors "k8s.io/apimachinery/pkg/api/errors"
@@ -24,11 +24,37 @@ import (
2424 "k8s.io/client-go/kubernetes"
2525 "k8s.io/client-go/tools/clientcmd"
2626
27+ "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation"
28+ "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops"
2729 "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport"
2830 "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/utils"
2931 "github.com/hetznercloud/hcloud-go/v2/hcloud"
3032)
3133
34+ // Load Balancer creation timeouts. Services that involve certificate
35+ // provisioning take longer to become ready.
36+ const (
37+ lbCreateTimeoutDefault = 8 * time .Minute
38+ lbCreateTimeoutCert = 16 * time .Minute
39+ )
40+
41+ // lbCreateTimeoutFor returns the timeout to use when waiting for the given
42+ // Load Balancer service to become ready.
43+ func lbCreateTimeoutFor (svc * corev1.Service ) time.Duration {
44+ certAnnotations := []annotation.Name {
45+ annotation .LBSvcHTTPCertificates ,
46+ annotation .LBSvcHTTPCertificateType ,
47+ annotation .LBSvcHTTPManagedCertificateName ,
48+ annotation .LBSvcHTTPManagedCertificateDomains ,
49+ }
50+ for _ , a := range certAnnotations {
51+ if _ , ok := svc .Annotations [string (a )]; ok {
52+ return lbCreateTimeoutCert
53+ }
54+ }
55+ return lbCreateTimeoutDefault
56+ }
57+
3258var rng * rand.Rand
3359
3460func init () {
@@ -53,7 +79,7 @@ type TestCluster struct {
5379 certDomain string
5480
5581 certificates * utils.SyncSet [int64 ]
56- loadBalancers * utils.SyncSet [int64 ]
82+ loadBalancers * utils.SyncSet [string ]
5783}
5884
5985func (tc * TestCluster ) Start () error {
@@ -104,7 +130,7 @@ func (tc *TestCluster) Start() error {
104130 tc .certDomain = os .Getenv ("CERT_DOMAIN" )
105131
106132 tc .certificates = utils .NewSyncSet [int64 ]()
107- tc .loadBalancers = utils .NewSyncSet [int64 ]()
133+ tc .loadBalancers = utils .NewSyncSet [string ]()
108134
109135 return nil
110136}
@@ -113,14 +139,26 @@ func (tc *TestCluster) Stop() error {
113139 errs := make ([]error , 0 , tc .loadBalancers .Size ()+ tc .certificates .Size ())
114140 ctx := context .Background ()
115141
116- for _ , item := range tc .loadBalancers .All () {
117- fmt .Printf ("deleting load balancer %d\n " , item )
118- if _ , err := tc .hcloud .LoadBalancer .Delete (ctx , & hcloud.LoadBalancer {ID : item }); err != nil {
119- errs = append (errs , fmt .Errorf ("delete load balancer %d failed: %w" , item , err ))
142+ uids := tc .loadBalancers .All ()
143+ selector := fmt .Sprintf ("%s in (%s)" , hcops .LabelServiceUID , strings .Join (uids , "," ))
144+ lbs , err := tc .hcloud .LoadBalancer .AllWithOpts (ctx , hcloud.LoadBalancerListOpts {
145+ ListOpts : hcloud.ListOpts {
146+ LabelSelector : selector ,
147+ },
148+ })
149+ if err != nil {
150+ return fmt .Errorf ("fetching load balancers via selector %s: %w" , selector , err )
151+ }
152+ for _ , lb := range lbs {
153+ // Stop is called after `m.Run()`, so we can't use t.Log anymore.
154+ fmt .Printf ("force-deleting leaked load balancer %d (%s)\n " , lb .ID , lb .Name )
155+ if _ , err := tc .hcloud .LoadBalancer .Delete (ctx , lb ); err != nil {
156+ errs = append (errs , fmt .Errorf ("delete leaked load balancer %d failed: %w" , lb .ID , err ))
120157 }
121158 }
122159
123160 for _ , item := range tc .certificates .All () {
161+ // Stop is called after `m.Run()`, so we can't use t.Log anymore.
124162 fmt .Printf ("deleting certificate %d\n " , item )
125163 if _ , err := tc .hcloud .Certificate .Delete (ctx , & hcloud.Certificate {ID : item }); err != nil {
126164 errs = append (errs , fmt .Errorf ("delete certificate %d failed: %w" , item , err ))
@@ -229,7 +267,7 @@ func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) {
229267 return nil , fmt .Errorf ("could not create test pod: %w" , err )
230268 }
231269
232- err = wait .PollUntilContextTimeout (ctx , 1 * time .Second , 1 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
270+ err = wait .PollUntilContextTimeout (ctx , 1 * time .Second , 2 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
233271 p , err := testCluster .k8sClient .CoreV1 ().Pods (l .namespace ).Get (ctx , podName , metav1.GetOptions {})
234272 if err != nil {
235273 return false , err
@@ -243,7 +281,7 @@ func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) {
243281 return false , nil
244282 })
245283 if err != nil {
246- return nil , fmt .Errorf ("pod %s did not come up after 1 minute : %w" , podName , err )
284+ return nil , fmt .Errorf ("pod %s did not come up after 2 minutes : %w" , podName , err )
247285 }
248286
249287 return pod , nil
@@ -280,16 +318,21 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string
280318}
281319
282320// CreateService creates a k8s service based on the given service definition
283- // and waits until it is "ready".
321+ // and waits until it is "ready". The wait timeout is derived from the service
322+ // annotations: services that provision certificates get a longer timeout.
284323func (l * lbTestHelper ) CreateService (lbSvc * corev1.Service ) (* corev1.Service , error ) {
285324 l .t .Helper ()
286325
326+ timeout := lbCreateTimeoutFor (lbSvc )
327+
287328 lbSvc , err := testCluster .k8sClient .CoreV1 ().Services (l .namespace ).Create (l .t .Context (), lbSvc , metav1.CreateOptions {})
288329 if err != nil {
289330 return nil , fmt .Errorf ("could not create service: %w" , err )
290331 }
291332
292- ctx , cancel := context .WithTimeout (l .t .Context (), 4 * time .Minute )
333+ testCluster .loadBalancers .Add (string (lbSvc .UID ))
334+
335+ ctx , cancel := context .WithTimeout (l .t .Context (), timeout )
293336 defer cancel ()
294337
295338 retries := 0
@@ -324,19 +367,24 @@ func (l *lbTestHelper) TearDown() {
324367
325368 // Use context.Background() rather than t.Context(): cleanup must run to
326369 // completion even when the test has already been cancelled or failed.
327- err := wait .PollUntilContextTimeout (context .Background (), 1 * time .Second , 3 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
370+ ctx := context .Background ()
371+
372+ err := wait .PollUntilContextTimeout (ctx , 1 * time .Second , 3 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
328373 err := testCluster .k8sClient .CoreV1 ().Namespaces ().Delete (ctx , l .namespace , metav1.DeleteOptions {})
329374 if err != nil && ! k8serrors .IsNotFound (err ) {
330375 return false , err
331376 }
332377 return k8serrors .IsNotFound (err ), nil
333378 })
334- require .NoError (l .t , err )
379+ if err != nil {
380+ // The cluster is deleted afterward, so we can info log this error
381+ l .t .Logf ("error tearing down test namespace: %v" , err )
382+ }
335383}
336384
337385// WaitForHTTPAvailable tries to connect to the given IP via HTTP or HTTPS
338386// (controlled by useHTTPS). It uses exponential backoff starting at 1s and
339- // capping at 30s, waiting up to 6 minutes for a successful HTTP 200 response.
387+ // capping at 30s, waiting up to 8 minutes for a successful HTTP 200 response.
340388// Each individual request has a 5s timeout.
341389func (l * lbTestHelper ) WaitForHTTPAvailable (ingressIP string , useHTTPS bool ) error {
342390 l .t .Helper ()
@@ -354,7 +402,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err
354402 proto = "https"
355403 }
356404
357- ctx , cancel := context .WithTimeout (l .t .Context (), 6 * time .Minute )
405+ ctx , cancel := context .WithTimeout (l .t .Context (), 8 * time .Minute )
358406 defer cancel ()
359407
360408 retries := 0
@@ -376,7 +424,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err
376424
377425 select {
378426 case <- ctx .Done ():
379- return fmt .Errorf ("timed out after 6m waiting for %s to be available" , ingressIP )
427+ return fmt .Errorf ("timed out after 8m waiting for %s to be available" , ingressIP )
380428 case <- time .After (pollBackoff (retries )):
381429 retries ++
382430 }
0 commit comments