@@ -133,10 +133,17 @@ impl Task for TunnelTask {
133133 let start = std:: time:: Instant :: now ( ) ;
134134
135135 match run_single_connection ( & self . conf_handle , & mut shutdown_signal) . await {
136- Ok ( ( ) ) => {
136+ Ok ( ConnectionOutcome :: Shutdown ) => {
137137 info ! ( "Tunnel task stopped" ) ;
138138 return Ok ( ( ) ) ;
139139 }
140+ Ok ( ConnectionOutcome :: CertRenewed ) => {
141+ // Renewal is a successful "completion", not a failure — skip
142+ // the backoff and reconnect immediately with the new cert.
143+ info ! ( "Certificate renewed; reconnecting with new cert immediately" ) ;
144+ backoff. reset ( ) ;
145+ continue ;
146+ }
140147 Err ( error) => {
141148 warn ! ( error = %format!( "{error:#}" ) , "Tunnel connection lost" ) ;
142149 }
@@ -175,11 +182,23 @@ impl Task for TunnelTask {
175182// Single connection lifetime
176183// ---------------------------------------------------------------------------
177184
185+ /// Outcome of a single connection lifetime, telling the outer loop what to do next.
186+ enum ConnectionOutcome {
187+ /// Shutdown signal received — exit the tunnel task cleanly.
188+ Shutdown ,
189+ /// Certificate was renewed successfully; reconnect immediately with the new cert.
190+ CertRenewed ,
191+ }
192+
178193/// Run a single QUIC tunnel connection lifetime: config → connect → event loop.
179194///
180- /// Returns `Ok(())` on graceful shutdown (shutdown signal received).
181- /// Returns `Err(...)` on any failure — the caller should retry with backoff.
182- async fn run_single_connection ( conf_handle : & ConfHandle , shutdown_signal : & mut ShutdownSignal ) -> anyhow:: Result < ( ) > {
195+ /// - `Ok(Shutdown)`: graceful shutdown, exit the task.
196+ /// - `Ok(CertRenewed)`: certificate renewed; caller should reconnect immediately.
197+ /// - `Err(...)`: connection lost or handshake failed — caller should retry with backoff.
198+ async fn run_single_connection (
199+ conf_handle : & ConfHandle ,
200+ shutdown_signal : & mut ShutdownSignal ,
201+ ) -> anyhow:: Result < ConnectionOutcome > {
183202 // Ensure rustls crypto provider is installed (ring).
184203 let _ = rustls:: crypto:: ring:: default_provider ( ) . install_default ( ) ;
185204
@@ -356,6 +375,18 @@ async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut S
356375
357376 info ! ( epoch, "Sent initial RouteAdvertise" ) ;
358377
378+ // -- Certificate renewal (post-connect, pre-traffic) --
379+ //
380+ // Run once per reconnect rather than on a periodic timer: the QUIC session
381+ // has a 120s idle timeout and 15s keep-alive, so any blip / VPN reconnect
382+ // / host sleep / gateway restart drops the connection within minutes and
383+ // sends us back through this path. With a 1-year cert and a 15-day
384+ // threshold, the renewal window will be hit on the first reconnect after
385+ // T-15d, which is more than often enough in any real deployment.
386+ if let Some ( outcome) = try_renew_certificate ( & mut ctrl, & connection, cert_path, key_path, ca_path) . await ? {
387+ return Ok ( outcome) ;
388+ }
389+
359390 // Split: recv half goes to a reader task, send half stays for periodic messages.
360391 let ( mut ctrl_send, ctrl_recv) = ctrl. into_split ( ) ;
361392 let mut task_handles = tokio:: task:: JoinSet :: new ( ) ;
@@ -409,7 +440,102 @@ async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut S
409440
410441 task_handles. shutdown ( ) . await ;
411442
412- Ok ( ( ) )
443+ Ok ( ConnectionOutcome :: Shutdown )
444+ }
445+
446+ // ---------------------------------------------------------------------------
447+ // Certificate renewal
448+ // ---------------------------------------------------------------------------
449+
450+ /// Check if the client cert is near expiry; if so, renew it via the control
451+ /// stream before opening real traffic.
452+ ///
453+ /// Returns:
454+ /// - `Ok(Some(CertRenewed))` — renewed successfully; outer loop must reconnect
455+ /// so the new cert takes effect on the next mTLS handshake.
456+ /// - `Ok(None)` — no renewal needed (or attempted renewal failed in a recoverable
457+ /// way, e.g. the gateway said no); proceed with the existing cert.
458+ /// - `Err(_)` — IO / protocol error on the control stream itself; treat as
459+ /// connection lost.
460+ async fn try_renew_certificate < S , R > (
461+ ctrl : & mut ControlStream < S , R > ,
462+ connection : & quinn:: Connection ,
463+ cert_path : & camino:: Utf8Path ,
464+ key_path : & camino:: Utf8Path ,
465+ ca_path : & camino:: Utf8Path ,
466+ ) -> anyhow:: Result < Option < ConnectionOutcome > >
467+ where
468+ S : tokio:: io:: AsyncWrite + Unpin ,
469+ R : tokio:: io:: AsyncRead + Unpin ,
470+ {
471+ const RENEWAL_THRESHOLD_DAYS : u32 = 15 ;
472+ const RENEWAL_TIMEOUT : Duration = Duration :: from_secs ( 30 ) ;
473+
474+ match crate :: enrollment:: is_cert_expiring ( cert_path, RENEWAL_THRESHOLD_DAYS ) {
475+ Ok ( false ) => {
476+ debug ! ( "Client certificate not in renewal window" ) ;
477+ return Ok ( None ) ;
478+ }
479+ Err ( error) => {
480+ warn ! ( error = %format!( "{error:#}" ) , "Failed to check certificate expiry; skipping renewal" ) ;
481+ return Ok ( None ) ;
482+ }
483+ Ok ( true ) => { }
484+ }
485+
486+ info ! (
487+ threshold_days = RENEWAL_THRESHOLD_DAYS ,
488+ "Certificate within renewal window; requesting renewal"
489+ ) ;
490+
491+ // Reuse the agent name from the existing cert as the renewal CSR's
492+ // CommonName. The gateway ignores CSR subject and trusts the
493+ // mTLS-authenticated identity, but matching the existing CN keeps the
494+ // CSR semantically correct in case validation tightens later.
495+ let agent_name = crate :: enrollment:: read_agent_name_from_cert ( cert_path)
496+ . context ( "read agent name from existing certificate for renewal" ) ?;
497+ let csr_pem =
498+ crate :: enrollment:: generate_csr_from_existing_key ( key_path, & agent_name) . context ( "generate renewal CSR" ) ?;
499+
500+ ctrl. send ( & ControlMessage :: cert_renewal_request ( csr_pem) )
501+ . await
502+ . context ( "send CertRenewalRequest" ) ?;
503+
504+ let response = tokio:: time:: timeout ( RENEWAL_TIMEOUT , ctrl. recv ( ) )
505+ . await
506+ . context ( "timeout waiting for CertRenewalResponse" ) ?
507+ . context ( "receive CertRenewalResponse" ) ?;
508+
509+ match response {
510+ ControlMessage :: CertRenewalResponse {
511+ result :
512+ agent_tunnel_proto:: CertRenewalResult :: Success {
513+ client_cert_pem,
514+ gateway_ca_cert_pem,
515+ } ,
516+ ..
517+ } => {
518+ std:: fs:: write ( cert_path. as_str ( ) , & client_cert_pem) . context ( "write renewed certificate" ) ?;
519+ std:: fs:: write ( ca_path. as_str ( ) , & gateway_ca_cert_pem) . context ( "write renewed CA certificate" ) ?;
520+ info ! ( "Certificate renewed; closing connection so new cert takes effect on reconnect" ) ;
521+ connection. close ( 0u32 . into ( ) , b"cert-renewed" ) ;
522+ Ok ( Some ( ConnectionOutcome :: CertRenewed ) )
523+ }
524+ ControlMessage :: CertRenewalResponse {
525+ result : agent_tunnel_proto:: CertRenewalResult :: Error { reason } ,
526+ ..
527+ } => {
528+ warn ! ( %reason, "Gateway refused certificate renewal; continuing with existing cert" ) ;
529+ Ok ( None )
530+ }
531+ other => {
532+ warn ! (
533+ ?other,
534+ "Unexpected response to renewal request; continuing with existing cert"
535+ ) ;
536+ Ok ( None )
537+ }
538+ }
413539}
414540
415541// ---------------------------------------------------------------------------
0 commit comments