88 "os/user"
99 "path/filepath"
1010 "strings"
11+ "time"
1112
1213 nodev1 "buf.build/gen/go/brevdev/devplane/protocolbuffers/go/devplaneapi/v1"
1314 "connectrpc.com/connect"
@@ -16,20 +17,16 @@ import (
1617 "github.com/brevdev/brev-cli/pkg/entity"
1718 "github.com/brevdev/brev-cli/pkg/externalnode"
1819 "github.com/brevdev/brev-cli/pkg/terminal"
20+ "github.com/cenkalti/backoff/v4"
1921)
2022
21- // sshConnectionError marks an error as being due to a transient connection/transport failure
22- type sshConnectionError struct { err error }
23+ const (
24+ backoffInitialInterval = 1 * time .Second
25+ backoffMaxInterval = 10 * time .Second
26+ backoffMaxElapsedTime = 1 * time .Minute
2327
24- func (e * sshConnectionError ) Error () string { return e .err .Error () }
25- func (e * sshConnectionError ) Unwrap () error { return e .err }
26-
27- // IsSSHConnectionError reports whether err indicates a transient connection/transport
28- // failure that may be retried. Used by grantSSHAccess to decide whether to backoff-retry.
29- func IsSSHConnectionError (err error ) bool {
30- var e * sshConnectionError
31- return errors .As (err , & e )
32- }
28+ backoffPrintRound = 500 * time .Millisecond
29+ )
3330
3431// BrevKeyComment is the marker appended to every SSH key that Brev installs.
3532// It allows RemoveBrevAuthorizedKeys to identify and remove exactly those keys.
@@ -56,28 +53,44 @@ func GrantSSHAccessToNode(
5653 }
5754
5855 client := nodeClients .NewNodeClient (tokenProvider , config .GlobalConfig .GetBrevPublicAPIURL ())
59- _ , err := client .GrantNodeSSHAccess (ctx , connect .NewRequest (& nodev1.GrantNodeSSHAccessRequest {
60- ExternalNodeId : reg .ExternalNodeID ,
61- UserId : targetUser .ID ,
62- LinuxUser : osUser .Username ,
63- }))
64- if err != nil {
65- // Transport errors (connection reset, EOF) are transient — leave the key
66- // installed so retries don't need to reinstall it, and signal the caller
67- // with a distinct error type.
68- var connectErr * connect.Error
69- if errors .As (err , & connectErr ) && connectErr .Code () == connect .CodeInternal {
70- return & sshConnectionError {err : fmt .Errorf ("failed to grant SSH access (transient): %w" , err )}
71- }
72- // Permanent error — roll back the key so we don't leave an unrecorded entry.
73- if targetUser .PublicKey != "" {
74- if rerr := RemoveAuthorizedKey (osUser , targetUser .PublicKey ); rerr != nil {
75- t .Vprintf (" %s\n " , t .Yellow (fmt .Sprintf ("Warning: failed to remove SSH key after failed grant: %v" , rerr )))
56+
57+ backoffCtx := backoff .WithContext (backoff .NewExponentialBackOff (
58+ backoff .WithInitialInterval (backoffInitialInterval ),
59+ backoff .WithMaxInterval (backoffMaxInterval ),
60+ backoff .WithMaxElapsedTime (backoffMaxElapsedTime ),
61+ ), ctx )
62+
63+ opToTry := func () error {
64+ _ , err := client .GrantNodeSSHAccess (ctx , connect .NewRequest (& nodev1.GrantNodeSSHAccessRequest {
65+ ExternalNodeId : reg .ExternalNodeID ,
66+ UserId : targetUser .ID ,
67+ LinuxUser : osUser .Username ,
68+ }))
69+ if err != nil {
70+ // Retryable error
71+ var connectErr * connect.Error
72+ if errors .As (err , & connectErr ) && connectErr .Code () == connect .CodeInternal {
73+ return fmt .Errorf ("failed to grant SSH access (transient): %w" , err )
74+ }
75+
76+ // Permanent error — roll back the key so we don't leave an unrecorded entry and abort the backoff retry
77+ if targetUser .PublicKey != "" {
78+ if rerr := RemoveAuthorizedKey (osUser , targetUser .PublicKey ); rerr != nil {
79+ t .Vprintf (" %s\n " , t .Yellow (fmt .Sprintf ("Warning: failed to remove SSH key after failed grant: %v" , rerr )))
80+ }
7681 }
82+ return backoff .Permanent (fmt .Errorf ("failed to grant SSH access: %w" , err ))
7783 }
84+
85+ return nil
86+ }
87+ onOpErr := func (err error , d time.Duration ) {
88+ t .Vprintf (" SSH access not yet granted; retrying in: %s...\n " , d .Round (backoffPrintRound ))
89+ }
90+ err := backoff .RetryNotify (opToTry , backoffCtx , onOpErr )
91+ if err != nil {
7892 return fmt .Errorf ("failed to grant SSH access: %w" , err )
7993 }
80-
8194 return nil
8295}
8396
0 commit comments