@@ -188,9 +188,9 @@ private async Task ProcessStats(List<GithubTargetConfiguration> targetConfig)
188188 {
189189 CspRunnerCount . Labels ( cc . CloudIdentifier ) . Set ( await cc . GetServerCountFromCsp ( ) ) ;
190190 }
191- catch
191+ catch ( Exception ex )
192192 {
193- _logger . LogWarning ( $ "Unable to get runner count from CSP { cc . CloudIdentifier } ") ;
193+ _logger . LogWarning ( $ "Unable to get runner count from CSP { cc . CloudIdentifier } : { ex . GetFullExceptionDetails ( ) } ") ;
194194 }
195195 }
196196
@@ -723,7 +723,7 @@ private async Task<bool> CreateRunner(CreateRunnerTask rt)
723723 }
724724
725725 var cc = _cc . First ( x => x . CloudIdentifier == selectedProvider . Cloud ) ;
726-
726+
727727 try
728728 {
729729 string targetName = rt . TargetType switch
@@ -732,34 +732,57 @@ private async Task<bool> CreateRunner(CreateRunnerTask rt)
732732 TargetType . Organization => runner . Owner ,
733733 _ => throw new ArgumentOutOfRangeException ( )
734734 } ;
735-
736-
737- Machine newRunner = await cc . CreateNewRunner ( runner . Arch , runner . Size , rt . RunnerToken , targetName , runner . IsCustom , runner . Profile ) ;
738- _logger . LogInformation ( $ "New Runner { newRunner . Name } [{ runner . Size } on { runner . Arch } ] entering pool for { targetName } .") ;
739- MachineCreatedCount . Labels ( runner . Owner , runner . Size ) . Inc ( ) ;
740-
741- runner . Hostname = newRunner . Name ;
742- runner . IsOnline = true ;
743- runner . CloudServerId = newRunner . Id ;
744- runner . IPv4 = newRunner . Ipv4 ;
745- runner . Cloud = cc . CloudIdentifier ;
746- runner . ProvisionId = newRunner . ProvisionId ;
747- runner . ProvisionPayload = newRunner . ProvisionPayload ;
748-
749-
750- runner . Lifecycle . Add ( new RunnerLifecycle
735+
736+
737+ Machine newRunner ;
738+ int retryAttempt = 0 ;
739+ const int maxRetries = 1 ;
740+ const int retryDelayMs = 1000 ; // 1 second delay between retries
741+
742+ while ( retryAttempt <= maxRetries )
751743 {
752- Status = RunnerStatus . Created ,
753- EventTimeUtc = DateTime . UtcNow ,
754- Event = $ "New Runner { newRunner . Name } [{ runner . Size } on { runner . Arch } ] entering pool for { targetName } ."
755- } ) ;
744+
745+ try
746+ {
747+ newRunner = await cc . CreateNewRunner ( runner . Arch , runner . Size , rt . RunnerToken , targetName , runner . IsCustom , runner . Profile ) ;
748+ _logger . LogInformation ( $ "New Runner { newRunner . Name } [{ runner . Size } on { runner . Arch } ] entering pool for { targetName } .") ;
749+ MachineCreatedCount . Labels ( runner . Owner , runner . Size ) . Inc ( ) ;
750+
751+ runner . Hostname = newRunner . Name ;
752+ runner . IsOnline = true ;
753+ runner . CloudServerId = newRunner . Id ;
754+ runner . IPv4 = newRunner . Ipv4 ;
755+ runner . Cloud = cc . CloudIdentifier ;
756+ runner . ProvisionId = newRunner . ProvisionId ;
757+ runner . ProvisionPayload = newRunner . ProvisionPayload ;
758+ runner . Lifecycle . Add ( new RunnerLifecycle
759+ {
760+ Status = RunnerStatus . Created ,
761+ EventTimeUtc = DateTime . UtcNow ,
762+ Event = $ "New Runner { newRunner . Name } [{ runner . Size } on { runner . Arch } ] entering pool for { targetName } ."
763+ } ) ;
764+ break ;
765+ }
766+ catch ( Exception ex )
767+ {
768+ if ( retryAttempt == maxRetries )
769+ {
770+ _logger . LogError ( ex , $ "Failed to create runner after { maxRetries + 1 } attempts") ;
771+ throw ; // Re-throw the exception after all retries are exhausted
772+ }
773+
774+ _logger . LogWarning ( ex , $ "Failed to create runner (attempt { retryAttempt + 1 } /{ maxRetries + 1 } ). Retrying...") ;
775+ await Task . Delay ( retryDelayMs ) ;
776+ retryAttempt ++ ;
777+ }
778+ }
756779 await db . SaveChangesAsync ( ) ;
757780
758781 return true ;
759782 }
760783 catch ( Exception ex )
761784 {
762- _logger . LogError ( $ "Unable to create runner [{ runner . Size } on { runner . Arch } | Retry: { rt . RetryCount } ]: { ex . Message } ") ;
785+ _logger . LogError ( $ "Unable to create runner [{ runner . Size } on { runner . Arch } | Retry: { rt . RetryCount } ]: { ex . GetFullExceptionDetails ( ) } ") ;
763786 runner . Lifecycle . Add ( new RunnerLifecycle
764787 {
765788 Status = RunnerStatus . Failure ,
@@ -789,7 +812,7 @@ private async Task<bool> CreateRunner(CreateRunnerTask rt)
789812 {
790813 Cloud = cc . CloudIdentifier ,
791814 Size = runner . Size ,
792- UnbanTime = DateTime . UtcNow + TimeSpan . FromMinutes ( 30 )
815+ UnbanTime = DateTime . UtcNow + TimeSpan . FromMinutes ( 10 )
793816 } ) ;
794817
795818 return false ;
0 commit comments