Skip to content

Commit 32dd2da

Browse files
committed
long retry and poll netbird status
1 parent 88dc335 commit 32dd2da

2 files changed

Lines changed: 69 additions & 18 deletions

File tree

pkg/cmd/register/register.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,20 @@ func runSetup(node *nodev1.ExternalNode, t *terminal.Terminal, deps registerDeps
330330
}
331331
}
332332

333+
// waitForNetbirdConnected polls "netbird status" until the management server
334+
// reports Connected or the timeout expires. Returns true if connected.
335+
func waitForNetbirdConnected(timeout time.Duration) bool {
336+
deadline := time.Now().Add(timeout)
337+
for time.Now().Before(deadline) {
338+
out, err := exec.Command("netbird", "status").Output() //nolint:gosec // fixed command
339+
if err == nil && netbirdManagementConnected(string(out)) {
340+
return true
341+
}
342+
time.Sleep(2 * time.Second)
343+
}
344+
return false
345+
}
346+
333347
func grantSSHAccess(ctx context.Context, t *terminal.Terminal, deps registerDeps, tokenProvider externalnode.TokenProvider, reg *DeviceRegistration, brevUser *entity.User, osUser *user.User) {
334348
t.Vprint("")
335349
t.Vprint(t.Green("Enabling SSH access on this device"))
@@ -339,14 +353,36 @@ func grantSSHAccess(ctx context.Context, t *terminal.Terminal, deps registerDeps
339353
t.Vprintf(" Linux user: %s\n", osUser.Username)
340354
t.Vprint("")
341355

342-
err := GrantSSHAccessToNode(ctx, t, deps.nodeClients, tokenProvider, reg, brevUser, osUser)
343-
if err != nil {
344-
t.Vprint(" Retrying in 3 seconds...")
345-
time.Sleep(3 * time.Second)
356+
t.Vprint(" Waiting for Brev tunnel to connect...")
357+
if !waitForNetbirdConnected(60 * time.Second) {
358+
t.Vprint(t.Yellow(" Tunnel did not connect within 60s."))
359+
t.Vprint(t.Yellow(" Run 'brev enable-ssh' once the tunnel is established."))
360+
return
361+
}
362+
t.Vprint(t.Green(" Tunnel connected."))
363+
t.Vprint("")
364+
365+
// Peer routes finish propagating after the management handshake. Retry
366+
// with increasing delays to give the routing up to ~90s to settle.
367+
retryDelays := []time.Duration{10 * time.Second, 20 * time.Second, 30 * time.Second}
368+
var err error
369+
for i, delay := range append([]time.Duration{0}, retryDelays...) {
370+
if delay > 0 {
371+
t.Vprintf(" Retrying in %s...\n", delay)
372+
time.Sleep(delay)
373+
}
346374
err = GrantSSHAccessToNode(ctx, t, deps.nodeClients, tokenProvider, reg, brevUser, osUser)
375+
if err == nil {
376+
break
377+
}
378+
if i < len(retryDelays) {
379+
t.Vprintf(" %s\n", t.Yellow(fmt.Sprintf("(%d/%d) %v", i+1, len(retryDelays)+1, err)))
380+
}
347381
}
348382
if err != nil {
349-
t.Vprintf(" Warning: %v\n", err)
383+
t.Vprintf(" %s\n", t.Yellow(fmt.Sprintf("Warning: %v", err)))
384+
t.Vprint(t.Yellow(" Your SSH public key is already installed locally on this device."))
385+
t.Vprint(t.Yellow(" Run 'brev enable-ssh' in ~1 minute to complete the server-side record."))
350386
return
351387
}
352388

pkg/cmd/register/sshkeys.go

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package register
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"os"
78
"os/user"
@@ -22,8 +23,12 @@ import (
2223
const BrevKeyComment = "# brev-cli"
2324

2425
// GrantSSHAccessToNode installs the user's public key in authorized_keys and
25-
// calls GrantNodeSSHAccess to record access server-side. If the RPC fails,
26-
// the installed key is rolled back.
26+
// calls GrantNodeSSHAccess to record access server-side.
27+
//
28+
// On a transient transport error (connect.CodeInternal, e.g. connection reset),
29+
// the key is left in authorized_keys so the caller can retry without
30+
// re-installing it. On a permanent application error (auth, not found, etc.)
31+
// the key is rolled back.
2732
func GrantSSHAccessToNode(
2833
ctx context.Context,
2934
t *terminal.Terminal,
@@ -34,9 +39,10 @@ func GrantSSHAccessToNode(
3439
osUser *user.User,
3540
) error {
3641
if targetUser.PublicKey != "" {
37-
if err := InstallAuthorizedKey(osUser, targetUser.PublicKey); err != nil {
42+
added, err := InstallAuthorizedKey(osUser, targetUser.PublicKey)
43+
if err != nil {
3844
t.Vprintf(" %s\n", t.Yellow(fmt.Sprintf("Warning: failed to install SSH public key: %v", err)))
39-
} else {
45+
} else if added {
4046
t.Vprint(" Brev public key added to authorized_keys.")
4147
}
4248
}
@@ -48,6 +54,14 @@ func GrantSSHAccessToNode(
4854
LinuxUser: osUser.Username,
4955
}))
5056
if err != nil {
57+
// Transport errors (connection reset, EOF) are transient — leave the key
58+
// installed so retries don't need to reinstall it, and signal the caller
59+
// with a distinct error type.
60+
var connectErr *connect.Error
61+
if errors.As(err, &connectErr) && connectErr.Code() == connect.CodeInternal {
62+
return fmt.Errorf("failed to grant SSH access (transient): %w", err)
63+
}
64+
// Permanent error — roll back the key so we don't leave an unrecorded entry.
5165
if targetUser.PublicKey != "" {
5266
if rerr := RemoveAuthorizedKey(osUser, targetUser.PublicKey); rerr != nil {
5367
t.Vprintf(" %s\n", t.Yellow(fmt.Sprintf("Warning: failed to remove SSH key after failed grant: %v", rerr)))
@@ -62,38 +76,39 @@ func GrantSSHAccessToNode(
6276
// InstallAuthorizedKey appends the given public key to the user's
6377
// ~/.ssh/authorized_keys if it isn't already present. The key is tagged with
6478
// a brev-cli comment so it can be removed later by RemoveBrevAuthorizedKeys.
65-
func InstallAuthorizedKey(u *user.User, pubKey string) error {
79+
// Returns true if the key was newly written, false if it was already present.
80+
func InstallAuthorizedKey(u *user.User, pubKey string) (bool, error) {
6681
pubKey = strings.TrimSpace(pubKey)
6782
if pubKey == "" {
68-
return nil
83+
return false, nil
6984
}
7085

7186
sshDir := filepath.Join(u.HomeDir, ".ssh")
7287
if err := os.MkdirAll(sshDir, 0o700); err != nil {
73-
return fmt.Errorf("creating .ssh directory: %w", err)
88+
return false, fmt.Errorf("creating .ssh directory: %w", err)
7489
}
7590

7691
authKeysPath := filepath.Join(sshDir, "authorized_keys")
7792

7893
existing, err := os.ReadFile(authKeysPath) // #nosec G304
7994
if err != nil && !os.IsNotExist(err) {
80-
return fmt.Errorf("reading authorized_keys: %w", err)
95+
return false, fmt.Errorf("reading authorized_keys: %w", err)
8196
}
8297

8398
taggedKey := pubKey + " " + BrevKeyComment
8499

85100
if strings.Contains(string(existing), taggedKey) {
86-
return nil // already present with tag
101+
return false, nil // already present with tag
87102
}
88103

89104
// If the key exists but isn't tagged, replace it with the tagged version
90105
// so that RemoveBrevAuthorizedKeys can find it later.
91106
if strings.Contains(string(existing), pubKey) {
92107
updated := strings.ReplaceAll(string(existing), pubKey, taggedKey)
93108
if err := os.WriteFile(authKeysPath, []byte(updated), 0o600); err != nil {
94-
return fmt.Errorf("writing authorized_keys: %w", err)
109+
return false, fmt.Errorf("writing authorized_keys: %w", err)
95110
}
96-
return nil
111+
return true, nil
97112
}
98113

99114
// Ensure existing content ends with a newline before appending.
@@ -104,10 +119,10 @@ func InstallAuthorizedKey(u *user.User, pubKey string) error {
104119
content += taggedKey + "\n"
105120

106121
if err := os.WriteFile(authKeysPath, []byte(content), 0o600); err != nil {
107-
return fmt.Errorf("writing authorized_keys: %w", err)
122+
return false, fmt.Errorf("writing authorized_keys: %w", err)
108123
}
109124

110-
return nil
125+
return true, nil
111126
}
112127

113128
// RemoveAuthorizedKey removes a specific public key from the user's

0 commit comments

Comments
 (0)