Skip to content

Commit 8e795bc

Browse files
committed
fix: surface h2 prewarm panics, harden dead-cell test, fix stale doc (#1029)
1 parent 5980baa commit 8e795bc

1 file changed

Lines changed: 24 additions & 18 deletions

File tree

src/domain_fronter.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -920,11 +920,13 @@ impl DomainFronter {
920920
/// prewarm behind `ensure_h2()` so the h1 pool stayed empty during
921921
/// the h2 init window.
922922
///
923-
/// Staggered 500 ms apart so we don't burst N TLS handshakes at the
924-
/// Google edge simultaneously, and each connection gets an 8 s
925-
/// expiry offset so they roll off gradually instead of all hitting
926-
/// POOL_TTL_SECS at once. If h2 ends up the active fast path,
927-
/// `run_pool_refill` trims the pool back down to
923+
/// The spawned h2 handshake races h1[0] — boot fires two TLS
924+
/// handshakes back-to-back. The 500 ms stagger only applies between
925+
/// h1[i] and h1[i+1] for i ≥ 1, so we don't burst the remaining
926+
/// h1[1..n] handshakes at the Google edge simultaneously. Each
927+
/// connection gets an 8 s expiry offset so they roll off gradually
928+
/// instead of all hitting POOL_TTL_SECS at once. If h2 ends up the
929+
/// active fast path, `run_pool_refill` trims the pool back down to
928930
/// `POOL_MIN_H2_FALLBACK` on the next tick — the extra warm h1
929931
/// sockets just age out naturally instead of being kept alive.
930932
pub async fn warm(self: &Arc<Self>, n: usize) {
@@ -961,10 +963,16 @@ impl DomainFronter {
961963
}
962964
}
963965
// Join the h2 prewarm here only to log whether it landed; the
964-
// h1 pool above is already populated either way. JoinError
965-
// collapses to "h2 not alive" — same as if ensure_h2 returned
966-
// None — so we still log a useful line.
967-
let h2_alive = h2_handle.await.unwrap_or(false);
966+
// h1 pool above is already populated either way. A panic in
967+
// the spawned task surfaces as `JoinError` — log it explicitly
968+
// so it isn't indistinguishable from a clean ALPN refusal.
969+
let h2_alive = match h2_handle.await {
970+
Ok(v) => v,
971+
Err(e) => {
972+
tracing::warn!("h2 prewarm task failed to join: {}", e);
973+
false
974+
}
975+
};
968976
if h2_alive {
969977
tracing::info!(
970978
"h2 fast path active; h1 fallback pool pre-warmed with {} connection(s)",
@@ -5231,15 +5239,13 @@ hello";
52315239

52325240
// The fast path normally returns Some(send, gen) when the cell
52335241
// is within TTL. With dead=true it must NOT return the stale
5234-
// SendRequest. We can't drive the open machinery here (no real
5235-
// Google edge), so the test asserts "doesn't return the stale
5236-
// cell" rather than "successfully reopens".
5237-
//
5238-
// ensure_h2 will fall through to the open path which will
5239-
// eventually try to TCP-connect to `connect_host:443`. That's
5240-
// a fake address in `fronter_for_test`, so the open will fail
5241-
// — and ensure_h2 returns None. The point is: the stale gen=1
5242-
// SendRequest was NOT served.
5242+
// SendRequest. Pre-set the failure-backoff timestamp so
5243+
// ensure_h2 short-circuits at the backoff check (no network
5244+
// I/O) regardless of whatever's bound on 127.0.0.1:443 on the
5245+
// dev/CI host. This isolates the assertion to the new
5246+
// dead-flag check.
5247+
*fronter.h2_open_failed_at.lock().await = Some(Instant::now());
5248+
52435249
let result = fronter.ensure_h2().await;
52445250
assert!(
52455251
result.is_none(),

0 commit comments

Comments
 (0)