55 "errors"
66 "fmt"
77 "io"
8+ "log/slog"
89 "net"
910 "os"
1011 "os/exec"
@@ -78,9 +79,12 @@ func newSSHCmd(opts *Options) *cobra.Command {
7879 if err != nil {
7980 return err
8081 }
82+ pfKeepAliveCtx , pfKeepAliveCancel := context .WithCancel (cmd .Context ())
83+ startPortForwardKeepalive (pfKeepAliveCtx , usedLocalPort , 30 * time .Second )
8184 var pfMu sync.Mutex
8285 currentCancelPF := cancelPF
8386 defer func () {
87+ pfKeepAliveCancel ()
8488 pfMu .Lock ()
8589 defer pfMu .Unlock ()
8690 if currentCancelPF != nil {
@@ -90,7 +94,7 @@ func newSSHCmd(opts *Options) *cobra.Command {
9094
9195 sshHost := sshHostAlias (sn )
9296 cfgPath , _ := config .ResolvePath (opts .ConfigPath )
93- if _ , cfgErr := ensureSSHConfigEntry (sshHost , sn , ns , user , remotePort , keyPath , cfgPath , cfg .Spec .Ports ); cfgErr != nil {
97+ if _ , cfgErr := ensureSSHConfigEntry (sshHost , sn , ns , user , remotePort , keyPath , cfgPath , cfg .Spec .Ports , cfg . Spec . SSH ); cfgErr != nil {
9498 fmt .Fprintf (cmd .ErrOrStderr (), "warning: failed to update ~/.ssh/config: %v\n " , cfgErr )
9599 }
96100
@@ -100,6 +104,7 @@ func newSSHCmd(opts *Options) *cobra.Command {
100104 RemotePort : remotePort ,
101105 KeepAliveInterval : time .Duration (cfg .Spec .SSH .KeepAliveInterval ) * time .Second ,
102106 KeepAliveTimeout : time .Duration (cfg .Spec .SSH .KeepAliveTimeout ) * time .Second ,
107+ KeepAliveCountMax : cfg .Spec .SSH .KeepAliveCountMax ,
103108 }
104109 if noTmux {
105110 tm .Env = map [string ]string {"OKDEV_NO_TMUX" : "1" }
@@ -123,11 +128,15 @@ func newSSHCmd(opts *Options) *cobra.Command {
123128 currentCancelPF ()
124129 currentCancelPF = nil
125130 }
131+ pfKeepAliveCancel ()
126132 cancel , lp , err := startSSHPortForwardWithFallback (newKubeClient (opts ), ns , podName (sn ), localPort , remotePort )
127133 if err != nil {
128134 return "" , 0 , err
129135 }
130136 currentCancelPF = cancel
137+ newCtx , newCancel := context .WithCancel (cmd .Context ())
138+ pfKeepAliveCancel = newCancel
139+ startPortForwardKeepalive (newCtx , lp , 30 * time .Second )
131140 return "127.0.0.1" , lp , nil
132141 })
133142 var lastRTTWarnNanos atomic.Int64
@@ -196,14 +205,23 @@ func newSSHCmd(opts *Options) *cobra.Command {
196205 }
197206 return nil
198207 }
199- if err := tm .OpenShell (); err != nil {
200- if ! tm .IsConnected () || isIgnorableProxyIOError (err ) {
201- fmt .Fprintln (cmd .ErrOrStderr (), "Connection lost. Session ended." )
202- return nil
208+ // Shell loop: reconnect automatically when the connection drops.
209+ for {
210+ err := tm .OpenShell ()
211+ if err == nil {
212+ return nil // clean exit (user typed exit/logout)
213+ }
214+ if isIgnorableProxyIOError (err ) || ! tm .IsConnected () {
215+ fmt .Fprintln (cmd .ErrOrStderr (), "\n Connection lost. Reconnecting..." )
216+ if ! tm .WaitConnected (cmd .Context ()) {
217+ fmt .Fprintln (cmd .ErrOrStderr (), "Reconnect failed. Session ended." )
218+ return nil
219+ }
220+ fmt .Fprintln (cmd .ErrOrStderr (), "Reconnected." )
221+ continue
203222 }
204223 return fmt .Errorf ("ssh shell failed: %w" , err )
205224 }
206- return nil
207225 },
208226 }
209227
@@ -313,7 +331,7 @@ func sshHostAlias(sessionName string) string {
313331 return "okdev-" + sessionName
314332}
315333
316- func ensureSSHConfigEntry (hostAlias , sessionName , namespace , user string , remotePort int , keyPath , okdevConfigPath string , forwards []config.PortMapping ) (bool , error ) {
334+ func ensureSSHConfigEntry (hostAlias , sessionName , namespace , user string , remotePort int , keyPath , okdevConfigPath string , forwards []config.PortMapping , sshSpec config. SSHSpec ) (bool , error ) {
317335 home , err := os .UserHomeDir ()
318336 if err != nil {
319337 return false , err
@@ -341,15 +359,9 @@ func ensureSSHConfigEntry(hostAlias, sessionName, namespace, user string, remote
341359 " UserKnownHostsFile /dev/null" ,
342360 " ProxyCommand " + proxyCmd ,
343361 }
344- for _ , p := range forwards {
345- if p .Local <= 0 || p .Remote <= 0 {
346- continue
347- }
348- blockLines = append (blockLines , fmt .Sprintf (" LocalForward %d 127.0.0.1:%d" , p .Local , p .Remote ))
349- }
350362 blockLines = append (blockLines ,
351- " ServerAliveInterval 30" ,
352- " ServerAliveCountMax 10" ,
363+ fmt . Sprintf ( " ServerAliveInterval %d" , sshSpec . KeepAliveInterval ) ,
364+ fmt . Sprintf ( " ServerAliveCountMax %d" , sshSpec . KeepAliveCountMax ) ,
353365 " TCPKeepAlive yes" ,
354366 " LogLevel ERROR" ,
355367 end ,
@@ -449,14 +461,21 @@ func newSSHProxyCmd(opts *Options) *cobra.Command {
449461 if err != nil {
450462 return err
451463 }
464+ pfKeepAliveCtx , pfKeepAliveCancel := context .WithCancel (context .Background ())
465+ startPortForwardKeepalive (pfKeepAliveCtx , usedLocalPort , 30 * time .Second )
466+ defer pfKeepAliveCancel ()
452467 if cancelPF != nil {
453468 defer cancelPF ()
454469 }
470+ slog .Debug ("ssh-proxy dialing local port-forward" , "port" , usedLocalPort )
455471 conn , err := waitDialLocal (usedLocalPort , 10 * time .Second )
456472 if err != nil {
473+ slog .Debug ("ssh-proxy dial failed" , "error" , err )
457474 return err
458475 }
459476 defer conn .Close ()
477+ slog .Debug ("ssh-proxy connection established" , "localAddr" , conn .LocalAddr (), "remoteAddr" , conn .RemoteAddr ())
478+
460479 var wg sync.WaitGroup
461480 var copyErr error
462481 var once sync.Once
@@ -465,13 +484,16 @@ func newSSHProxyCmd(opts *Options) *cobra.Command {
465484 if err == nil || errors .Is (err , io .EOF ) || errors .Is (err , net .ErrClosed ) || errors .Is (err , syscall .EPIPE ) || isIgnorableProxyIOError (err ) {
466485 return
467486 }
487+ slog .Debug ("ssh-proxy copy error" , "error" , err )
468488 once .Do (func () { copyErr = err })
469489 }
470490 wg .Add (2 )
471491 go func () {
472492 defer wg .Done ()
493+ slog .Debug ("ssh-proxy starting copy: stdin -> conn" )
473494 _ , err := io .Copy (conn , os .Stdin )
474495 setErr (err )
496+ slog .Debug ("ssh-proxy finished copy: stdin -> conn" )
475497 select {
476498 case <- done :
477499 default :
@@ -480,12 +502,15 @@ func newSSHProxyCmd(opts *Options) *cobra.Command {
480502 }()
481503 go func () {
482504 defer wg .Done ()
505+ slog .Debug ("ssh-proxy starting copy: conn -> stdout" )
483506 _ , err := io .Copy (os .Stdout , conn )
484507 setErr (err )
508+ slog .Debug ("ssh-proxy finished copy: conn -> stdout" )
485509 close (done )
486510 _ = conn .Close ()
487511 }()
488512 <- done
513+ slog .Debug ("ssh-proxy session finished" , "error" , copyErr )
489514 return copyErr
490515 },
491516 }
@@ -500,7 +525,8 @@ func isIgnorableProxyIOError(err error) bool {
500525 msg := strings .ToLower (err .Error ())
501526 return strings .Contains (msg , "broken pipe" ) ||
502527 strings .Contains (msg , "use of closed network connection" ) ||
503- strings .Contains (msg , "connection reset by peer" )
528+ strings .Contains (msg , "connection reset by peer" ) ||
529+ strings .Contains (msg , "remote command exited without exit status" )
504530}
505531
506532func waitDialLocal (localPort int , timeout time.Duration ) (net.Conn , error ) {
@@ -533,7 +559,7 @@ func sshControlSocketPath(hostAlias string) (string, error) {
533559 return filepath .Join (dir , hostAlias + ".sock" ), nil
534560}
535561
536- func startManagedSSHForward (hostAlias string ) error {
562+ func startManagedSSHForward (hostAlias string , sshSpec config. SSHSpec ) error {
537563 socketPath , err := sshControlSocketPath (hostAlias )
538564 if err != nil {
539565 return err
@@ -542,19 +568,38 @@ func startManagedSSHForward(hostAlias string) error {
542568 if err := check .Run (); err == nil {
543569 return nil
544570 }
545- cmd := exec .Command (
571+ return startManagedSSHForwardWithForwards (hostAlias , nil , sshSpec )
572+ }
573+
574+ func startManagedSSHForwardWithForwards (hostAlias string , forwards []config.PortMapping , sshSpec config.SSHSpec ) error {
575+ socketPath , err := sshControlSocketPath (hostAlias )
576+ if err != nil {
577+ return err
578+ }
579+ check := exec .Command ("ssh" , "-S" , socketPath , "-O" , "check" , hostAlias )
580+ if err := check .Run (); err == nil {
581+ return nil
582+ }
583+ args := []string {
546584 "ssh" ,
547585 "-fN" ,
548586 "-M" ,
549587 "-S" , socketPath ,
550- "-o" , "ControlPersist=600 " ,
588+ "-o" , "ControlPersist=3600 " ,
551589 "-o" , "ExitOnForwardFailure=no" ,
552- "-o" , "ServerAliveInterval=30" ,
553- "-o" , "ServerAliveCountMax=10" ,
590+ "-o" , fmt . Sprintf ( "ServerAliveInterval=%d" , sshSpec . KeepAliveInterval ) ,
591+ "-o" , fmt . Sprintf ( "ServerAliveCountMax=%d" , sshSpec . KeepAliveCountMax ) ,
554592 "-o" , "TCPKeepAlive=yes" ,
555593 "-o" , "LogLevel=ERROR" ,
556- hostAlias ,
557- )
594+ }
595+ for _ , p := range forwards {
596+ if p .Local <= 0 || p .Remote <= 0 {
597+ continue
598+ }
599+ args = append (args , "-L" , fmt .Sprintf ("%d:127.0.0.1:%d" , p .Local , p .Remote ))
600+ }
601+ args = append (args , hostAlias )
602+ cmd := exec .Command (args [0 ], args [1 :]... )
558603 if out , err := cmd .CombinedOutput (); err != nil {
559604 return fmt .Errorf ("start managed ssh forward: %w (%s)" , err , strings .TrimSpace (string (out )))
560605 }
0 commit comments