@@ -23,6 +23,19 @@ import (
2323 "encoding/json"
2424 "errors"
2525 "fmt"
26+ "io"
27+ "io/ioutil"
28+ "log"
29+ "os"
30+ "os/exec"
31+ "path"
32+ "path/filepath"
33+ "strconv"
34+ "strings"
35+ "sync"
36+ "syscall"
37+ "time"
38+
2639 "github.com/aws/aws-sdk-go/aws"
2740 "github.com/aws/aws-sdk-go/aws/credentials"
2841 "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
@@ -38,18 +51,6 @@ import (
3851 "github.com/devtron-labs/common-lib/utils/dockerOperations"
3952 "github.com/devtron-labs/common-lib/utils/retryFunc"
4053 "golang.org/x/sync/errgroup"
41- "io"
42- "io/ioutil"
43- "log"
44- "os"
45- "os/exec"
46- "path"
47- "path/filepath"
48- "strconv"
49- "strings"
50- "sync"
51- "syscall"
52- "time"
5354)
5455
5556const (
@@ -74,15 +75,23 @@ type DockerHelper interface {
7475 GetDockerAuthConfigForPrivateRegistries (workflowRequest * CommonWorkflowRequest ) * bean.DockerAuthConfig
7576}
7677
78+ // BuildxK8sClientFactory creates a BuildxK8sInterface from deployment names.
79+ // Abstracted for testability — production code uses newBuildxK8sClient as the default.
80+ type BuildxK8sClientFactory func (deploymentNames []string ) (BuildxK8sInterface , error )
81+
7782type DockerHelperImpl struct {
7883 DockerCommandEnv []string
7984 cmdExecutor CommandExecutor
85+ k8sClientFactory BuildxK8sClientFactory
8086}
8187
8288func NewDockerHelperImpl (cmdExecutor CommandExecutor ) * DockerHelperImpl {
8389 return & DockerHelperImpl {
8490 DockerCommandEnv : os .Environ (),
8591 cmdExecutor : cmdExecutor ,
92+ k8sClientFactory : func (names []string ) (BuildxK8sInterface , error ) {
93+ return newBuildxK8sClient (names )
94+ },
8695 }
8796}
8897
@@ -300,9 +309,27 @@ func (impl *DockerHelperImpl) DockerLogin(ciContext cicxt.CiContext, dockerCrede
300309 return performDockerLogin ()
301310}
302311
312+ // waitForBuilderPods waits until all buildx k8s driver pods reach Running state,
313+ // or returns an error if the deadline is exceeded. Extracted for testability.
314+ func waitForBuilderPods (ctx context.Context , k8sClient BuildxK8sInterface , duration time.Duration ) error {
315+ log .Println (util .DEVTRON , fmt .Sprintf ("waiting for builder pods to be ready (timeout: %v)" , duration ))
316+ initDone := make (chan bool , 1 )
317+ initCtx , initCancel := context .WithTimeout (ctx , duration )
318+ defer initCancel ()
319+ go k8sClient .WaitUntilBuilderPodLive (initCtx , initDone )
320+ select {
321+ case <- initDone :
322+ log .Println (util .DEVTRON , "builder pods are ready for build" )
323+ return nil
324+ case <- initCtx .Done ():
325+ return fmt .Errorf ("builder pods did not reach Running state within %v" , duration )
326+ }
327+ }
328+
303329func (impl * DockerHelperImpl ) executeDockerReBuild (ciContext cicxt.CiContext , k8sClient BuildxK8sInterface ,
304330 useBuildxK8sDriver bool , dockerBuild string , deploymentNames []string ,
305- dockerBuildStageMetadata bean2.DockerBuildStageMetadata , reBuildLogs []any ) error {
331+ dockerBuildStageMetadata bean2.DockerBuildStageMetadata , reBuildLogs []any ,
332+ builderPodWaitDuration time.Duration ) error {
306333 if ! useBuildxK8sDriver {
307334 return nil
308335 }
@@ -311,7 +338,7 @@ func (impl *DockerHelperImpl) executeDockerReBuild(ciContext cicxt.CiContext, k8
311338 log .Println (util .DEVTRON , fmt .Sprintf (" error in RestartBuilders : %s" , k8sErr .Error ()))
312339 return k8sErr
313340 }
314- k8sClient , err := newBuildxK8sClient (deploymentNames )
341+ k8sClient , err := impl . k8sClientFactory (deploymentNames )
315342 if err != nil {
316343 log .Println (util .DEVTRON , " error in creating buildxK8sClient , err : " , err .Error ())
317344 return err
@@ -324,18 +351,15 @@ func (impl *DockerHelperImpl) executeDockerReBuild(ciContext cicxt.CiContext, k8
324351 rebuildImageStage := func () error {
325352 // wait for the builder pod to be up again
326353 startTime := time .Now ()
327- util .LogInfo ("Waiting for builder pod to be ready," , "timeout: 2 minutes" )
328- done := make (chan bool )
329- ctx , cancel := context .WithCancel (ciContext )
354+ util .LogInfo ("Waiting for builder pod to be ready," , fmt . Sprintf ( "timeout: %v" , builderPodWaitDuration ) )
355+ done := make (chan bool , 1 ) // buffered to prevent goroutine leak on timeout
356+ ctx , cancel := context .WithTimeout (ciContext , builderPodWaitDuration )
330357 defer cancel ()
331358 go k8sClient .WaitUntilBuilderPodLive (ctx , done )
332359 select {
333360 case <- done :
334361 // builder pod is up again, continue with the build
335- cancel ()
336- case <- time .After (2 * time .Minute ):
337- // timeout after 2 minutes
338- cancel ()
362+ case <- ctx .Done ():
339363 return BuilderPodDeletedError
340364 }
341365 util .LogInfo ("DONE -->" , time .Since (startTime ).Seconds ())
@@ -403,6 +427,10 @@ func (impl *DockerHelperImpl) BuildArtifact(ciRequest *CommonWorkflowRequest) (s
403427 if err != nil {
404428 log .Println ("Error while parsing environment variables" , err )
405429 }
430+ builderPodWaitDuration := 2 * time .Minute // backward-compat default
431+ if ciRequest .BuildxBuilderPodWaitDurationSecs > 0 {
432+ builderPodWaitDuration = time .Duration (ciRequest .BuildxBuilderPodWaitDurationSecs ) * time .Second
433+ }
406434 if ciRequest .DockerImageTag == "" {
407435 ciRequest .DockerImageTag = "latest"
408436 }
@@ -453,12 +481,12 @@ func (impl *DockerHelperImpl) BuildArtifact(ciRequest *CommonWorkflowRequest) (s
453481 }
454482 useBuildxK8sDriver , eligibleK8sDriverNodes = dockerBuildConfig .CheckForBuildXK8sDriver ()
455483 if useBuildxK8sDriver {
456- deploymentNames , err = impl .createBuildxBuilderWithK8sDriver (ciContext , ciRequest .PropagateLabelsInBuildxPod , ciRequest .DockerConnection , dockerBuildConfig .BuildxDriverImage , eligibleK8sDriverNodes , ciRequest .PipelineId , ciRequest .WorkflowId )
484+ deploymentNames , err = impl .createBuildxBuilderWithK8sDriver (ciContext , ciRequest .PropagateLabelsInBuildxPod , ciRequest .DockerConnection , dockerBuildConfig .BuildxDriverImage , eligibleK8sDriverNodes , ciRequest .PipelineId , ciRequest .WorkflowId , builderPodWaitDuration )
457485 if err != nil {
458486 log .Println (util .DEVTRON , " error in creating buildxDriver , err : " , err .Error ())
459487 return err
460488 }
461- k8sClient , err = newBuildxK8sClient (deploymentNames )
489+ k8sClient , err = impl . k8sClientFactory (deploymentNames )
462490 if err != nil {
463491 log .Println (util .DEVTRON , " error in creating buildxK8sClient , err : " , err .Error ())
464492 return err
@@ -469,6 +497,11 @@ func (impl *DockerHelperImpl) BuildArtifact(ciRequest *CommonWorkflowRequest) (s
469497 log .Println (util .DEVTRON , " error in registering builder pods " , " err: " , err )
470498 return err
471499 }
500+ // Wait for builder pods to reach Running state before starting the build.
501+ // Prevents false-positive BuilderPodDeletedError from pod startup latency.
502+ if err = waitForBuilderPods (ciContext , k8sClient , builderPodWaitDuration ); err != nil {
503+ return err
504+ }
472505 } else {
473506 err = impl .createBuildxBuilderForMultiArchBuild (ciContext , ciRequest .DockerConnection , dockerBuildConfig .BuildxDriverImage )
474507 if err != nil {
@@ -534,7 +567,7 @@ func (impl *DockerHelperImpl) BuildArtifact(ciRequest *CommonWorkflowRequest) (s
534567 reBuildLogs = []any {fmt .Sprintf ("Starting re docker build (Attempt %d) : " , attempt ), dockerBuild }
535568 }
536569 return impl .executeDockerReBuild (ciContext , k8sClient , useBuildxK8sDriver , dockerBuild ,
537- deploymentNames , dockerBuildStageMetadata , reBuildLogs )
570+ deploymentNames , dockerBuildStageMetadata , reBuildLogs , builderPodWaitDuration )
538571 }
539572 err = retryFunc .RetryWithOutLogging (callback , retryFunc .IsRetryableError , maxRetry , 1 * time .Second )
540573 if err != nil {
@@ -1097,14 +1130,14 @@ func (impl *DockerHelperImpl) createBuildxBuilderForMultiArchBuild(ciContext cic
10971130 return nil
10981131}
10991132
1100- func (impl * DockerHelperImpl ) createBuildxBuilderWithK8sDriver (ciContext cicxt.CiContext , propagateLabelsInBuildxPod bool , dockerConnection , buildxDriverImage string , builderNodes []map [string ]string , ciPipelineId , ciWorkflowId int ) ([]string , error ) {
1133+ func (impl * DockerHelperImpl ) createBuildxBuilderWithK8sDriver (ciContext cicxt.CiContext , propagateLabelsInBuildxPod bool , dockerConnection , buildxDriverImage string , builderNodes []map [string ]string , ciPipelineId , ciWorkflowId int , timeout time. Duration ) ([]string , error ) {
11011134 deploymentNames := make ([]string , 0 )
11021135 if len (builderNodes ) == 0 {
11031136 return deploymentNames , errors .New ("atleast one node is expected for builder with kubernetes driver" )
11041137 }
11051138 for i := 0 ; i < len (builderNodes ); i ++ {
11061139 nodeOpts := builderNodes [i ]
1107- builderCmd , deploymentName , err := getBuildxK8sDriverCmd (propagateLabelsInBuildxPod , dockerConnection , buildxDriverImage , nodeOpts , ciPipelineId , ciWorkflowId )
1140+ builderCmd , deploymentName , err := getBuildxK8sDriverCmd (propagateLabelsInBuildxPod , dockerConnection , buildxDriverImage , nodeOpts , ciPipelineId , ciWorkflowId , timeout )
11081141 if err != nil {
11091142 return deploymentNames , err
11101143 }
@@ -1181,7 +1214,7 @@ func (impl *DockerHelperImpl) runCmd(cmd string) (error, *bytes.Buffer) {
11811214 return err , errBuf
11821215}
11831216
1184- func getBuildxK8sDriverCmd (propagateLabelsInBuildxPod bool , dockerConnection , buildxDriverImage string , driverOpts map [string ]string , ciPipelineId , ciWorkflowId int ) (string , string , error ) {
1217+ func getBuildxK8sDriverCmd (propagateLabelsInBuildxPod bool , dockerConnection , buildxDriverImage string , driverOpts map [string ]string , ciPipelineId , ciWorkflowId int , timeout time. Duration ) (string , string , error ) {
11851218 buildxCreate := "docker buildx create --buildkitd-flags '--allow-insecure-entitlement network.host --allow-insecure-entitlement security.insecure' --name=%s --driver=kubernetes --node=%s --bootstrap "
11861219 nodeName := driverOpts ["node" ]
11871220 if nodeName == "" {
@@ -1203,6 +1236,9 @@ func getBuildxK8sDriverCmd(propagateLabelsInBuildxPod bool, dockerConnection, bu
12031236 }
12041237
12051238 driverOpts ["driverOptions" ] = getBuildXDriverOptionsWithImage (buildxDriverImage , driverOpts ["driverOptions" ])
1239+ if timeout > 0 {
1240+ driverOpts ["driverOptions" ] = getBuildXDriverOptionsWithTimeout (timeout , driverOpts ["driverOptions" ])
1241+ }
12061242 if len (driverOpts ["driverOptions" ]) > 0 {
12071243 buildxCreate += " '--driver-opt=%s' "
12081244 buildxCreate = fmt .Sprintf (buildxCreate , driverOpts ["driverOptions" ])
@@ -1227,6 +1263,21 @@ func getBuildXDriverOptionsWithImage(buildxDriverImage, driverOptions string) st
12271263 return driverOptions
12281264}
12291265
1266+ func getBuildXDriverOptionsWithTimeout (timeout time.Duration , driverOptions string ) string {
1267+ if strings .HasPrefix (driverOptions , "timeout=" ) ||
1268+ strings .Contains (driverOptions , ",timeout=" ) {
1269+ // if timeout is already present in driver options then do not override it, just return the existing options
1270+ return driverOptions
1271+ }
1272+ timeoutOption := fmt .Sprintf ("\" timeout=%s\" " , timeout .String ())
1273+ if len (driverOptions ) > 0 {
1274+ driverOptions += fmt .Sprintf (",%s" , timeoutOption )
1275+ } else {
1276+ driverOptions = timeoutOption
1277+ }
1278+ return driverOptions
1279+ }
1280+
12301281func getBuildXDriverOptionsWithLabelsAndAnnotations (driverOptions string ) (string , error ) {
12311282 // not passing annotation as of now because --driver-opt=annotations is not supported by buildx if contains quotes
12321283 labels := make (map [string ]string )
0 commit comments