MasterHttpRelayVPN-RUST/src/domain_fronter.rs at a3af410b3b89bc922cd8132e509c6fe664ea2e37 · CaptainMirage/MasterHttpRelayVPN-RUST · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Apps Script relay client.
//!
//! Opens a TLS connection to the configured Google IP while the TLS SNI is set
//! to `front_domain` (e.g. "www.google.com"). Inside the encrypted stream, HTTP
//! `Host` points to `script.google.com`, and we POST a JSON payload to
//! `/macros/s/{script_id}/exec`. Apps Script performs the actual upstream
//! HTTP fetch server-side and returns a JSON envelope.
//!
//! Multiplexes over HTTP/2 when the relay edge agrees via ALPN; falls back
//! to HTTP/1.1 keep-alive when h2 is refused or fails. Range-parallel
//! downloads are implemented by `relay_parallel_range_to` (writer-based,
//! streams files larger than Apps Script's single-GET ceiling) with a
//! buffered `relay_parallel_range` compatibility wrapper for callers that
//! want a `Vec<u8>` back.

use std::collections::HashMap;
// AtomicU64 via portable-atomic: native on 64-bit / armv7, spinlock-
// backed on mipsel (MIPS32 has no 64-bit atomic instructions). API
// is identical to std::sync::atomic::AtomicU64 so call sites need
// no other changes.
use portable_atomic::AtomicU64;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};

use base64::engine::general_purpose::STANDARD as B64;
use base64::Engine;
use bytes::Bytes;
use rand::{thread_rng, Rng, RngCore};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpStream;
use tokio::sync::{broadcast, Mutex};
use tokio::time::timeout;
use tokio_rustls::client::TlsStream;
use tokio_rustls::TlsConnector;

use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};
use rustls::pki_types::{CertificateDer, ServerName, UnixTime};
use rustls::{ClientConfig, DigitallySignedStruct, SignatureScheme};

use crate::cache::{cache_key, is_cacheable_method, parse_ttl, ResponseCache};
use crate::config::Config;

#[derive(Debug, thiserror::Error)]
pub enum FronterError {
    #[error("io: {0}")]
    Io(#[from] std::io::Error),
    #[error("tls: {0}")]
    Tls(#[from] rustls::Error),
    #[error("invalid dns name: {0}")]
    Dns(#[from] rustls::pki_types::InvalidDnsNameError),
    #[error("bad response: {0}")]
    BadResponse(String),
    #[error("relay error: {0}")]
    Relay(String),
    #[error("timeout")]
    Timeout,
    #[error("json: {0}")]
    Json(#[from] serde_json::Error),
    /// Wraps another error and tells outer retry/fallback layers
    /// (`do_relay_with_retry`, the exit-node→direct-Apps-Script
    /// fallback in `relay()`) NOT to replay the request. Used when an
    /// h2 attempt failed *after* `send_request` succeeded — the
    /// request may have already reached and been processed by Apps
    /// Script (or the exit node), and replaying via h1 / direct path
    /// would duplicate side effects for non-idempotent methods.
    ///
    /// `Display` is transparent so error messages look identical to
    /// the wrapped variant; tests/observability use `is_retryable()`
    /// and `into_inner()` to introspect.
    #[error(transparent)]
    NonRetryable(Box<FronterError>),
}

impl FronterError {
    /// True if outer retry/fallback layers may safely re-issue the
    /// request. False for `NonRetryable(_)` — those errors signal
    /// "request may have been sent; do not duplicate."
    pub fn is_retryable(&self) -> bool {
        !matches!(self, FronterError::NonRetryable(_))
    }

    /// Strip the `NonRetryable` wrapper, returning the underlying
    /// error. Useful for surfacing the original message after the
    /// retry/fallback policy has already done its job.
    pub fn into_inner(self) -> FronterError {
        match self {
            FronterError::NonRetryable(inner) => *inner,
            other => other,
        }
    }
}

type PooledStream = TlsStream<TcpStream>;
const POOL_TTL_SECS: u64 = 60;
const POOL_MIN: usize = 8;
const POOL_REFILL_INTERVAL_SECS: u64 = 5;
const POOL_MAX: usize = 80;
const REQUEST_TIMEOUT_SECS: u64 = 25;
const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
/// HTTP/2 connection lifetime before we proactively reopen. Apps Script's
/// edge has been observed to send GOAWAY at ~10 min anyway, so we cycle
/// at 9 min to do an orderly reconnect on our schedule rather than
/// letting an in-flight stream race a server-initiated close.
const H2_CONN_TTL_SECS: u64 = 540;
/// Bound on the h2 ready/back-pressure phase only. `SendRequest::ready()`
/// awaits a free slot under the server's `MAX_CONCURRENT_STREAMS`. A
/// stall here means the connection is overloaded (or dead at the
/// muxer level) but no stream has been opened yet — RequestSent::No,
/// safe to fall back to h1 without duplication risk. Kept short
/// (5 s) so a saturated conn doesn't burn the caller's whole budget.
///
/// The post-send phase (response headers + body drain) uses the
/// caller-supplied `response_deadline` instead — see
/// `h2_round_trip`. This way a slow but legitimate Apps Script call
/// isn't cut off at an arbitrary fixed cap, and Full-mode batches can
/// honor the user's `request_timeout_secs` setting.
const H2_READY_TIMEOUT_SECS: u64 = 5;
/// Default response-phase deadline used by `relay_uncoalesced` callers
/// (the Apps-Script direct path). Sized to be just under the outer
/// `REQUEST_TIMEOUT_SECS` (25 s) so an h2 timeout still leaves a few
/// seconds of outer budget for an h1 fallback round-trip when the
/// caller chose to retry.
const H2_RESPONSE_DEADLINE_DEFAULT_SECS: u64 = 20;
/// Bound on the TCP connect + TLS handshake + h2 handshake phase. A
/// blackholed `connect_host:443` previously stalled `ensure_h2` until
/// the outer 25 s timeout fired (returning 504 without ever falling
/// back). With this bound, a slow open trips after 8 s and the caller
/// drops to h1 with ~17 s of outer budget to spare.
const H2_OPEN_TIMEOUT_SECS: u64 = 8;
/// After an h2 open failure, suppress further open attempts for this
/// long. Prevents every concurrent caller during an h2 outage from
/// paying its own full handshake-timeout cost in turn.
const H2_OPEN_FAILURE_BACKOFF_SECS: u64 = 15;
/// Same idea as `H2_OPEN_TIMEOUT_SECS` but for the legacy h1 socket
/// path. Without this, a stuck TCP connect or TLS handshake to a
/// blackholed `connect_host:443` would block `acquire()` (and the
/// `warm()` prewarm loop) until the outer batch budget elapsed —
/// the same symptom #924 hit during the warm-race window. Bounded
/// here so a single hung handshake aborts fast and the loop / caller
/// makes progress on the next attempt.
const H1_OPEN_TIMEOUT_SECS: u64 = 8;
/// Cadence for Apps Script container keepalive pings. Apps Script
/// containers go cold after ~5min idle and cost 1-3s on the first
/// request to wake back up — most painful on YouTube / streaming where
/// the first chunk after a quiet pause stalls the player.
const H1_KEEPALIVE_INTERVAL_SECS: u64 = 240;
/// Largest response body Apps Script's `UrlFetchApp` will deliver before
/// the script gets killed mid-execution. The hard wire ceiling is ~50 MiB;
/// after base64 / envelope overhead and edge variance, the practical raw
/// ceiling for a single GET sits around 40 MiB. This bounds the
/// **writer-based** API's streaming threshold: above this, the buffered
/// stitch path's single-GET fallback wouldn't fit through Apps Script
/// even if invoked, so streaming chunks straight to the wire (with
/// truncate-on-failure semantics the client can resume via Range)
/// strictly beats today's 25 s timeout + 504 "Apps Script
/// unresponsive" (#1042).
const APPS_SCRIPT_BODY_MAX_BYTES: u64 = 40 * 1024 * 1024;

/// Hard ceiling on how many bytes the streaming side of the
/// range-parallel path will fetch for a single response. A hostile
/// origin can advertise an absurd `Content-Range` total
/// (`bytes 0-262143/<huge>`), pass our probe-checks with a normally-
/// sized 256 KiB first-chunk body, and then drive us to keep issuing
/// chunk Apps Script calls until the client disconnects. Each chunk
/// is one Apps Script invocation, counting against the account's
/// daily quota (~20 k requests/day on the free tier), so an
/// unattended hostile download can exhaust the quota and lock the
/// user out of the relay entirely.
///
/// 16 GiB is well above any legitimate single-file download a user
/// is likely to do through a relay VPN (game patches, OS images,
/// video files all fit) but small enough to bound worst-case quota
/// drain to ~65 k chunks per pwned URL. Above this cap the streaming
/// branch refuses the response with a 502 instead of plowing
/// through.
const MAX_STREAMED_RANGE_BYTES: u64 = 16 * 1024 * 1024 * 1024;

/// Byte interval between `range-parallel-stream` progress log lines.
/// Large downloads through the streaming branch otherwise look stuck
/// in the logs (one "starting N chunks" line at the top, nothing
/// until completion or failure). At 16 MiB intervals the operator sees
/// ~6 lines per 100 MiB and ~64 lines per 1 GiB — useful pace at the
/// ~1.4 MB/s typical through-relay throughput, and quiet enough that
/// even a 16 GiB file won't drown the log (~1024 progress lines over
/// the multi-hour download). Per user feedback on PR #1085.
const STREAM_PROGRESS_LOG_INTERVAL_BYTES: u64 = 16 * 1024 * 1024;

/// Hard ceiling on the buffered stitch buffer's `Vec::with_capacity(total)`
/// allocation. Two roles:
///
///   1. Memory-safety cap. A hostile/buggy origin advertising
///      `Content-Range: bytes 0-1/<huge>` could otherwise drive
///      preallocation to enormous values; totals above this either
///      stream (writer-based API) or fall back to a single GET
///      (`Vec<u8>` compatibility wrapper, see
///      [`DomainFronter::relay_parallel_range`]).
///   2. Pre-1.9.23 compatibility floor for the `Vec<u8>` wrapper.
///      Range-capable downloads in the 40-64 MiB band used to stitch
///      successfully via the buffered path; collapsing this constant
///      into [`APPS_SCRIPT_BODY_MAX_BYTES`] would have pushed those
///      onto the single-GET fallback path, where Apps Script returns
///      502/504 because they're above its 50 MiB response ceiling.
///      Keeping the two cutoffs separate restores that band's
///      working buffered behavior for wrapper callers.
const BUFFERED_STITCH_MAX_BYTES: u64 = 64 * 1024 * 1024;

struct PoolEntry {
    stream: PooledStream,
    created: Instant,
}

/// Single shared HTTP/2 connection to the Google edge. One TCP/TLS
/// socket carries up to ~100 concurrent streams (server's
/// `MAX_CONCURRENT_STREAMS` setting); each relay request takes a clone
/// of the `SendRequest` handle and opens its own stream. Cheaper than
/// the legacy per-request socket pool — no head-of-line blocking when
/// a single Apps Script call stalls.
///
/// `generation` is monotonic per fronter and lets `poison_h2_if_gen`
/// avoid the race where task A's stale failure clears task B's
/// freshly-reopened healthy cell.
///
/// `dead` is set by the spawned connection-driver task when the h2
/// `Connection` future ends (GOAWAY, network error, normal close).
/// Without this, the cell silently held a dead `SendRequest` after a
/// mid-session disconnect — the next request paid a wasted h2 round
/// trip to detect it via `ready()` failure, AND `run_pool_refill`
/// kept maintaining the small `POOL_MIN_H2_FALLBACK` (2-socket) pool
/// instead of expanding to `POOL_MIN` (8). With the flag,
/// `run_pool_refill` notices h2 is dead within one tick (≤5 s) and
/// pre-warms the larger fallback pool before the next request burst,
/// and `ensure_h2` short-circuits the `H2_CONN_TTL_SECS`-based
/// liveness check on a known-dead cell.
struct H2Cell {
    send: h2::client::SendRequest<Bytes>,
    created: Instant,
    generation: u64,
    dead: Arc<AtomicBool>,
}

/// "Did this request reach Apps Script?" signal carried out of every
/// h2 failure so callers know whether replaying via h1 is safe.
///
/// - `No`: the failure occurred before `send_request` returned. The
///   stream was never opened on the wire; replaying through h1 is
///   guaranteed not to duplicate any side effect.
/// - `Maybe`: `send_request` succeeded (headers queued for sending)
///   but a later step failed — server may have already received the
///   request and may already be processing it. Replaying a
///   non-idempotent op (POST/PUT/DELETE, tunnel write, batch ops)
///   risks duplicating side effects. Only safe to retry for methods
///   that are idempotent by HTTP semantics.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum RequestSent {
    No,
    Maybe,
}

/// Typed errors from `open_h2`. Used so `ensure_h2` can recognize the
/// "peer refused h2 in ALPN" outcome and sticky-disable the fast path
/// without resorting to string matching across function boundaries.
#[derive(Debug, thiserror::Error)]
enum OpenH2Error {
    #[error("ALPN did not negotiate h2; peer prefers http/1.1")]
    AlpnRefused,
    #[error("io: {0}")]
    Io(#[from] std::io::Error),
    #[error("tls: {0}")]
    Tls(#[from] rustls::Error),
    #[error("dns: {0}")]
    Dns(#[from] rustls::pki_types::InvalidDnsNameError),
    #[error("h2 handshake: {0}")]
    Handshake(String),
}

impl From<OpenH2Error> for FronterError {
    fn from(e: OpenH2Error) -> Self {
        match e {
            OpenH2Error::Io(e) => FronterError::Io(e),
            OpenH2Error::Tls(e) => FronterError::Tls(e),
            OpenH2Error::Dns(e) => FronterError::Dns(e),
            OpenH2Error::AlpnRefused => FronterError::Relay("alpn refused h2".into()),
            OpenH2Error::Handshake(m) => FronterError::Relay(format!("h2 handshake: {}", m)),
        }
    }
}

pub struct DomainFronter {
    connect_host: String,
    /// Pool of SNI domains to rotate through per outbound connection. All of
    /// them must be hosted on the same Google edge as `connect_host` (that's
    /// the whole point of domain fronting). Rotating across several of them
    /// defeats naive DPI that would count "too many connections to a single
    /// SNI". Populated from config's front_domain: if that's a single name we
    /// add a small pool of known-safe Google subdomains automatically.
    sni_hosts: Vec<String>,
    sni_idx: AtomicUsize,
    http_host: &'static str,
    auth_key: String,
    script_ids: Vec<String>,
    script_idx: AtomicUsize,
    /// Fan-out factor: fire this many Apps Script instances in parallel
    /// per request and return first success. `<= 1` = off.
    parallel_relay: usize,
    /// Enable the `normalize_x_graphql` URL rewrite (issue #16, credit
    /// seramo_ir). When true, GETs to `x.com/i/api/graphql/<hash>/<op>`
    /// have their query trimmed to the first `variables=` block so the
    /// response cache isn't busted by the constantly-changing `features`
    /// / `fieldToggles` params.
    normalize_x_graphql: bool,
    /// Set once we've emitted the "UnknownIssuer means ISP MITM" hint,
    /// so we don't spam it every time a cert-validation error repeats.
    cert_hint_shown: std::sync::atomic::AtomicBool,
    /// Connector used by `open_h2`: advertises ALPN `["h2", "http/1.1"]`
    /// when the h2 fast path is enabled, else just `["http/1.1"]`. Never
    /// used by the h1 pool path — see `tls_connector_h1`.
    tls_connector: TlsConnector,
    /// Connector used by `open()` (h1 pool warm/refill/acquire). ALPN
    /// is forced to `["http/1.1"]` so a Google edge that would have
    /// preferred h2 still negotiates h1 here. Without this, pooled
    /// sockets could end up speaking h2 frames after handshake, and
    /// the `write_all(b"GET / HTTP/1.1\r\n...")` fallback would land
    /// on a server that has no idea what we're doing.
    tls_connector_h1: TlsConnector,
    pool: Arc<Mutex<Vec<PoolEntry>>>,
    /// HTTP/2 fast path. `None` until first relay opens it; cleared on
    /// connection failure or expiry so the next call reopens. Skipped
    /// entirely when `force_http1` is set or when the peer refused h2
    /// during ALPN (sticky `h2_disabled`).
    h2_cell: Arc<Mutex<Option<H2Cell>>>,
    /// Serializes "open a new h2 connection" attempts so that during
    /// an outage, only one task pays the handshake cost — concurrent
    /// callers see the lock contended via `try_lock` and fall through
    /// to h1 immediately rather than queueing behind a slow handshake.
    /// Distinct from `h2_cell` so the cell mutex is never held across
    /// network I/O.
    h2_open_lock: Arc<Mutex<()>>,
    /// Wall-clock timestamp of the last failed `open_h2`. While within
    /// `H2_OPEN_FAILURE_BACKOFF_SECS` of this, `ensure_h2` returns None
    /// without retrying — prevents thundering-herd handshake attempts
    /// during transient h2 outages.
    h2_open_failed_at: Arc<Mutex<Option<Instant>>>,
    /// Monotonic counter for `H2Cell::generation`. Each successful
    /// `open_h2` increments and tags the new cell so `poison_h2_if_gen`
    /// can avoid the race where a stale failure clears a freshly-opened
    /// cell that another task just installed.
    h2_generation: Arc<AtomicU64>,
    /// Set when ALPN negotiates http/1.1 (peer refused h2) or when
    /// `force_http1` is true. Sticky for the lifetime of the fronter:
    /// once we know this peer doesn't speak h2, don't keep retrying
    /// the handshake on every relay call.
    h2_disabled: Arc<AtomicBool>,
    cache: Arc<ResponseCache>,
    inflight: Arc<Mutex<HashMap<String, broadcast::Sender<Vec<u8>>>>>,
    coalesced: AtomicU64,
    blacklist: Arc<std::sync::Mutex<HashMap<String, Instant>>>,
    /// Per-deployment rolling timeout counter. Maps `script_id` →
    /// `(window_start, strike_count)`. Reset when the window expires
    /// or when a batch succeeds. Triggers a short-cooldown blacklist
    /// at `TIMEOUT_STRIKE_LIMIT`. Distinct from `blacklist` because
    /// strike state is per-deployment health bookkeeping, not the
    /// permanent ban list.
    script_timeouts: Arc<std::sync::Mutex<HashMap<String, (Instant, u32)>>>,
    relay_calls: AtomicU64,
    relay_failures: AtomicU64,
    bytes_relayed: AtomicU64,
    /// Relay calls that successfully completed over the h2 fast path,
    /// across **all** entry points: Apps-Script direct relays,
    /// exit-node outer calls, full-mode tunnel single ops, and
    /// full-mode tunnel batches.
    ///
    /// **Not** comparable to `relay_calls`: that counter only counts
    /// the Apps-Script-direct path (incremented in `relay_uncoalesced`).
    /// The other three paths bypass `relay_uncoalesced` entirely, so in
    /// full-mode deployments `h2_calls` can exceed `relay_calls` —
    /// reading their ratio as a "% on h2" gives a wrong number.
    ///
    /// To gauge h2 health, compute `h2_calls / (h2_calls + h2_fallbacks)`.
    /// That's the success ratio across all transports; a healthy
    /// deployment shows > 95 %.
    h2_calls: AtomicU64,
    /// Relay calls that attempted h2 but had to fall back to h1
    /// (transient handshake failure, mid-stream error, conn poisoned,
    /// open backoff, or `RequestSent::No` failure that the call site
    /// chose to retry on h1). Same all-entry-points scope as
    /// `h2_calls`. A persistently high `h2_fallbacks / (h2_calls +
    /// h2_fallbacks)` ratio indicates an unhealthy h2 conn or a flaky
    /// middlebox eating h2 frames; consider `force_http1: true`.
    h2_fallbacks: AtomicU64,
    /// Per-host breakdown of traffic going through this fronter. Keyed by
    /// the host of the URL (e.g. "api.x.com"). Read-mostly; only touched
    /// on the slow path (once per relayed request), so a plain Mutex is
    /// fine.
    per_site: Arc<std::sync::Mutex<HashMap<String, HostStat>>>,
    /// Daily-scoped counters, reset at 00:00 UTC. Tracks what *this
    /// mhrv-rs process* has observed today — NOT the authoritative
    /// Apps Script quota bucket on Google's side (which counts across
    /// every client hitting the same deployment). Useful as a local
    /// "budget used today" estimate in the UI.
    ///
    /// Both counters rebase to zero the first time any recording call
    /// crosses a UTC date boundary. `day_key` holds "YYYY-MM-DD" of
    /// the currently-counted day; when we see a new date we swap and
    /// clear the counters.
    today_calls: AtomicU64,
    today_bytes: AtomicU64,
    today_key: std::sync::Mutex<String>,
    /// Suppress the random `_pad` field that v1.8.0+ adds to outbound
    /// payloads. Mirrors `Config::disable_padding` (#391). Default false
    /// (padding active = stronger DPI defense at +25% bandwidth cost).
    disable_padding: bool,
    /// Per-instance auto-blacklist tuning. Mirrors `Config::auto_blacklist_*`
    /// (#391, #444). Cached here so the hot path in `record_timeout_strike`
    /// doesn't have to reach back through the Config (which we don't keep
    /// a reference to).
    auto_blacklist_strikes: u32,
    auto_blacklist_window: Duration,
    auto_blacklist_cooldown: Duration,
    /// Per-batch HTTP timeout. Mirrors `Config::request_timeout_secs`
    /// (#430, masterking32 PR #25). Read by `tunnel_client::fire_batch`
    /// so a single config field tunes the timeout used everywhere.
    batch_timeout: Duration,
    /// Optional second-hop exit node (Deno Deploy / fly.io / etc.)
    /// to bypass CF-anti-bot blocks on sites that flag Google datacenter
    /// IPs (chatgpt.com, claude.ai, grok.com, x.com). Mirrors
    /// `Config::exit_node`. When `exit_node_enabled` is false (the more
    /// common state), all relay traffic takes the regular Apps Script
    /// path. When true, hosts matching `exit_node_hosts` (or all hosts
    /// when `exit_node_full`) route through the exit-node URL inside
    /// the Apps Script call.
    exit_node_enabled: bool,
    exit_node_url: String,
    exit_node_psk: String,
    exit_node_full: bool,
    /// Pre-normalized (lowercased, leading-dot stripped) host list for
    /// fast O(N) match in `exit_node_matches`.
    exit_node_hosts: Vec<String>,
}

/// Aggregated stats for one remote host.
#[derive(Default, Clone, Debug)]
pub struct HostStat {
    pub requests: u64,
    pub cache_hits: u64,
    pub bytes: u64,
    pub total_latency_ns: u64,
}

impl HostStat {
    pub fn avg_latency_ms(&self) -> f64 {
        if self.requests == 0 {
            0.0
        } else {
            (self.total_latency_ns as f64) / (self.requests as f64) / 1_000_000.0
        }
    }
}

const BLACKLIST_COOLDOWN_SECS: u64 = 600;

/// Auto-blacklist defaults are now per-instance fields on `DomainFronter`,
/// driven by `Config::auto_blacklist_strikes` / `_window_secs` /
/// `_cooldown_secs` (#391, #444). The constants below are gone — see the
/// `Config` doc comments for tuning guidance and `default_auto_blacklist_*`
/// for the historical defaults (3 strikes / 30s window / 120s cooldown).

/// Request payload sent to Apps Script (single, non-batch).
#[derive(Serialize)]
struct RelayRequest<'a> {
    k: &'a str,
    m: &'a str,
    u: &'a str,
    #[serde(skip_serializing_if = "Option::is_none")]
    h: Option<serde_json::Map<String, Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    b: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    ct: Option<&'a str>,
    r: bool,
}

/// Parsed Apps Script response JSON (single mode).
#[derive(Deserialize, Default)]
struct RelayResponse {
    #[serde(default)]
    s: Option<u16>,
    #[serde(default)]
    h: Option<serde_json::Map<String, Value>>,
    #[serde(default)]
    b: Option<String>,
    #[serde(default)]
    e: Option<String>,
}

/// Parsed tunnel response JSON (full mode).
#[derive(Deserialize, Debug, Clone)]
pub struct TunnelResponse {
    #[serde(default)]
    pub sid: Option<String>,
    #[serde(default)]
    pub d: Option<String>,
    /// UDP datagrams returned by tunnel-node, base64-encoded individually.
    #[serde(default)]
    pub pkts: Option<Vec<String>>,
    #[serde(default)]
    pub eof: Option<bool>,
    #[serde(default)]
    pub e: Option<String>,
    /// Structured error code from the tunnel-node (e.g. `UNSUPPORTED_OP`).
    /// `None` for legacy tunnel-nodes; clients should fall back to parsing
    /// `e` only when this is `None` and compatibility is needed.
    #[serde(default)]
    pub code: Option<String>,
}

/// A single op in a batch tunnel request.
#[derive(Serialize, Clone, Debug)]
pub struct BatchOp {
    pub op: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sid: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub host: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub port: Option<u16>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub d: Option<String>,
}

/// Batch tunnel response from Apps Script / tunnel node.
#[derive(Deserialize, Debug)]
pub struct BatchTunnelResponse {
    #[serde(default)]
    pub r: Vec<TunnelResponse>,
    #[serde(default)]
    pub e: Option<String>,
}

impl DomainFronter {
    pub fn new(config: &Config) -> Result<Self, FronterError> {
        let script_ids = config.script_ids_resolved();
        if script_ids.is_empty() {
            return Err(FronterError::Relay("no script_id configured".into()));
        }
        // Helper that builds a fresh ClientConfig with the verifier
        // policy from config. We need two of these so the h2-capable
        // and h1-only paths can advertise different ALPN sets without
        // mutating one shared config across calls.
        let build_tls_config = || {
            if config.verify_ssl {
                let mut roots = rustls::RootCertStore::empty();
                roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
                ClientConfig::builder()
                    .with_root_certificates(roots)
                    .with_no_client_auth()
            } else {
                ClientConfig::builder()
                    .dangerous()
                    .with_custom_certificate_verifier(Arc::new(NoVerify))
                    .with_no_client_auth()
            }
        };

        // Connector for `open_h2`: advertises h2 first (or just h1 if
        // the kill switch is set, in which case both connectors end up
        // identical — fine, just slightly redundant).
        let mut tls_h2 = build_tls_config();
        if !config.force_http1 {
            tls_h2.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
        } else {
            tls_h2.alpn_protocols = vec![b"http/1.1".to_vec()];
        }
        let tls_connector = TlsConnector::from(Arc::new(tls_h2));

        // Connector for `open()` (h1 pool path). ALPN is forced to
        // http/1.1 so a Google edge that would otherwise prefer h2
        // still negotiates h1 here — pooled sockets always speak the
        // protocol the fallback path expects.
        let mut tls_h1 = build_tls_config();
        tls_h1.alpn_protocols = vec![b"http/1.1".to_vec()];
        let tls_connector_h1 = TlsConnector::from(Arc::new(tls_h1));

        Ok(Self {
            connect_host: config.google_ip.clone(),
            sni_hosts: build_sni_pool_for(
                &config.front_domain,
                config.sni_hosts.as_deref().unwrap_or(&[]),
            ),
            sni_idx: AtomicUsize::new(0),
            http_host: "script.google.com",
            auth_key: config.auth_key.clone(),
            parallel_relay: config.parallel_relay as usize,
            normalize_x_graphql: config.normalize_x_graphql,
            cert_hint_shown: std::sync::atomic::AtomicBool::new(false),
            script_ids,
            script_idx: AtomicUsize::new(0),
            tls_connector,
            tls_connector_h1,
            pool: Arc::new(Mutex::new(Vec::new())),
            h2_cell: Arc::new(Mutex::new(None)),
            h2_open_lock: Arc::new(Mutex::new(())),
            h2_open_failed_at: Arc::new(Mutex::new(None)),
            h2_generation: Arc::new(AtomicU64::new(0)),
            h2_disabled: Arc::new(AtomicBool::new(config.force_http1)),
            cache: Arc::new(ResponseCache::with_default()),
            inflight: Arc::new(Mutex::new(HashMap::new())),
            coalesced: AtomicU64::new(0),
            blacklist: Arc::new(std::sync::Mutex::new(HashMap::new())),
            script_timeouts: Arc::new(std::sync::Mutex::new(HashMap::new())),
            relay_calls: AtomicU64::new(0),
            relay_failures: AtomicU64::new(0),
            bytes_relayed: AtomicU64::new(0),
            h2_calls: AtomicU64::new(0),
            h2_fallbacks: AtomicU64::new(0),
            per_site: Arc::new(std::sync::Mutex::new(HashMap::new())),
            today_calls: AtomicU64::new(0),
            today_bytes: AtomicU64::new(0),
            today_key: std::sync::Mutex::new(current_pt_day_key()),
            disable_padding: config.disable_padding,
            auto_blacklist_strikes: config.auto_blacklist_strikes.max(1),
            auto_blacklist_window: Duration::from_secs(
                config.auto_blacklist_window_secs.clamp(1, 3600),
            ),
            auto_blacklist_cooldown: Duration::from_secs(
                config.auto_blacklist_cooldown_secs.clamp(1, 86400),
            ),
            batch_timeout: Duration::from_secs(
                config.request_timeout_secs.clamp(5, 300),
            ),
            exit_node_enabled: config.exit_node.enabled
                && !config.exit_node.relay_url.is_empty()
                && !config.exit_node.psk.is_empty(),
            exit_node_url: config
                .exit_node
                .relay_url
                .trim_end_matches('/')
                .to_string(),
            exit_node_psk: config.exit_node.psk.clone(),
            exit_node_full: matches!(
                config.exit_node.mode.to_ascii_lowercase().as_str(),
                "full"
            ),
            exit_node_hosts: config
                .exit_node
                .hosts
                .iter()
                .map(|h| h.trim().trim_start_matches('.').to_ascii_lowercase())
                .filter(|h| !h.is_empty())
                .collect(),
        })
    }

    /// True when the configured exit node should handle this URL.
    /// In `selective` mode (default), checks the host against the
    /// pre-normalized `exit_node_hosts` list (exact match OR
    /// dot-anchored suffix, mirroring `passthrough_hosts` semantics).
    /// In `full` mode, every URL routes through the exit node.
    pub(crate) fn exit_node_matches(&self, url: &str) -> bool {
        if !self.exit_node_enabled {
            return false;
        }
        if self.exit_node_full {
            return true;
        }
        let host = match extract_host(url) {
            Some(h) => h,
            None => return false,
        };
        let host_lc = host.to_ascii_lowercase();
        for entry in &self.exit_node_hosts {
            if host_lc == *entry || host_lc.ends_with(&format!(".{}", entry)) {
                return true;
            }
        }
        false
    }

    /// Per-batch HTTP round-trip timeout. Read by `tunnel_client` so the
    /// `BATCH_TIMEOUT` constant doesn't have to be touched on every config
    /// change. Clamped to `[5s, 300s]` at construction.
    pub(crate) fn batch_timeout(&self) -> Duration {
        self.batch_timeout
    }

    /// Record one relay call toward the daily budget. Called once per
    /// outbound Apps Script fetch. Rolls over both daily counters at
    /// 00:00 Pacific Time, matching Apps Script's quota reset cadence
    /// (#230, #362). Crate-public so the Full-mode batch path in
    /// `tunnel_client::fire_batch` can wire into the same accounting
    /// (Apps Script sees Full-mode batches as ordinary `UrlFetchApp`
    /// calls and counts them against the same daily quota).
    pub(crate) fn record_today(&self, bytes: u64) {
        let today = current_pt_day_key();
        // Fast path: same day as what we last saw. No lock.
        let mut guard = self.today_key.lock().unwrap();
        if *guard != today {
            // Date rolled over — reset counters before this call is counted.
            *guard = today;
            self.today_calls.store(0, Ordering::Relaxed);
            self.today_bytes.store(0, Ordering::Relaxed);
        }
        drop(guard);
        self.today_calls.fetch_add(1, Ordering::Relaxed);
        self.today_bytes.fetch_add(bytes, Ordering::Relaxed);
    }

    /// Increment the per-site counters. Called on every logical request
    /// (both cache hits and relay roundtrips).
    fn record_site(&self, url: &str, cache_hit: bool, bytes: u64, latency_ns: u64) {
        let host = match extract_host(url) {
            Some(h) => h,
            None => return,
        };
        let mut m = self.per_site.lock().unwrap();
        let e = m.entry(host).or_default();
        e.requests += 1;
        if cache_hit {
            e.cache_hits += 1;
        }
        e.bytes += bytes;
        e.total_latency_ns += latency_ns;
    }

    /// Snapshot per-site stats, sorted by request count descending.
    pub fn snapshot_per_site(&self) -> Vec<(String, HostStat)> {
        let m = self.per_site.lock().unwrap();
        let mut v: Vec<(String, HostStat)> =
            m.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
        v.sort_by(|a, b| b.1.requests.cmp(&a.1.requests));
        v
    }

    pub fn snapshot_stats(&self) -> StatsSnapshot {
        let bl = self.blacklist.lock().unwrap();
        // Read today_key under lock and cheaply check rollover so the
        // UI never sees stale "today_calls=1847" on a day where no
        // traffic has flowed yet (e.g. user left the app open past
        // midnight PT).
        let today_now = current_pt_day_key();
        let today_key = {
            let mut guard = self.today_key.lock().unwrap();
            if *guard != today_now {
                *guard = today_now.clone();
                self.today_calls.store(0, Ordering::Relaxed);
                self.today_bytes.store(0, Ordering::Relaxed);
            }
            guard.clone()
        };
        StatsSnapshot {
            relay_calls: self.relay_calls.load(Ordering::Relaxed),
            relay_failures: self.relay_failures.load(Ordering::Relaxed),
            coalesced: self.coalesced.load(Ordering::Relaxed),
            bytes_relayed: self.bytes_relayed.load(Ordering::Relaxed),
            cache_hits: self.cache.hits(),
            cache_misses: self.cache.misses(),
            cache_bytes: self.cache.size(),
            blacklisted_scripts: bl.len(),
            total_scripts: self.script_ids.len(),
            today_calls: self.today_calls.load(Ordering::Relaxed),
            today_bytes: self.today_bytes.load(Ordering::Relaxed),
            today_key,
            today_reset_secs: seconds_until_pacific_midnight(),
            h2_calls: self.h2_calls.load(Ordering::Relaxed),
            h2_fallbacks: self.h2_fallbacks.load(Ordering::Relaxed),
            h2_disabled: self.h2_disabled.load(Ordering::Relaxed),
        }
    }

    pub fn num_scripts(&self) -> usize {
        self.script_ids.len()
    }

    pub fn script_id_list(&self) -> &[String] {
        &self.script_ids
    }

    pub fn cache(&self) -> &ResponseCache {
        &self.cache
    }

    pub fn coalesced_count(&self) -> u64 {
        self.coalesced.load(Ordering::Relaxed)
    }

    pub fn next_script_id(&self) -> String {
        let n = self.script_ids.len();
        let mut bl = self.blacklist.lock().unwrap();
        let now = Instant::now();
        bl.retain(|_, until| *until > now);

        for _ in 0..n {
            let idx = self.script_idx.fetch_add(1, Ordering::Relaxed);
            let sid = &self.script_ids[idx % n];
            if !bl.contains_key(sid) {
                return sid.clone();
            }
        }
        // All blacklisted: pick whichever comes off cooldown soonest.
        if let Some((sid, _)) = bl.iter().min_by_key(|(_, t)| **t) {
            let sid = sid.clone();
            bl.remove(&sid);
            return sid;
        }
        self.script_ids[0].clone()
    }

    /// Pick `want` distinct non-blacklisted script IDs for a parallel fan-out
    /// dispatch. Returns fewer than `want` if there aren't enough non-blacklisted
    /// IDs available. Advances the round-robin index by `want` to spread load
    /// across subsequent calls.
    fn next_script_ids(&self, want: usize) -> Vec<String> {
        let n = self.script_ids.len();
        if n == 0 {
            return vec![];
        }
        let mut bl = self.blacklist.lock().unwrap();
        let now = Instant::now();
        bl.retain(|_, until| *until > now);

        let mut picked: Vec<String> = Vec::with_capacity(want);
        for _ in 0..n {
            if picked.len() >= want {
                break;
            }
            let idx = self.script_idx.fetch_add(1, Ordering::Relaxed);
            let sid = &self.script_ids[idx % n];
            if !bl.contains_key(sid) && !picked.iter().any(|p| p == sid) {
                picked.push(sid.clone());
            }
        }
        if picked.is_empty() {
            picked.push(self.script_ids[0].clone());
        }
        picked
    }

    fn blacklist_script(&self, script_id: &str, reason: &str) {
        self.blacklist_script_for(
            script_id,
            Duration::from_secs(BLACKLIST_COOLDOWN_SECS),
            reason,
        );
    }

    fn blacklist_script_for(&self, script_id: &str, cooldown: Duration, reason: &str) {
        let until = Instant::now() + cooldown;
        let mut bl = self.blacklist.lock().unwrap();
        bl.insert(script_id.to_string(), until);
        tracing::warn!(
            "blacklisted script {} for {}s: {}",
            mask_script_id(script_id),
            cooldown.as_secs(),
            reason
        );
    }

    /// Record a batch timeout against `script_id`. After
    /// `TIMEOUT_STRIKE_LIMIT` timeouts inside `TIMEOUT_STRIKE_WINDOW`
    /// the deployment is blacklisted with a short cooldown so the
    /// round-robin stops sending real traffic to a deployment that's
    /// hung (most commonly: stale `TUNNEL_SERVER_URL` after the
    /// tunnel-node moved hosts).
    pub(crate) fn record_timeout_strike(&self, script_id: &str) {
        let now = Instant::now();
        let mut counts = self.script_timeouts.lock().unwrap();
        let entry = counts
            .entry(script_id.to_string())
            .or_insert((now, 0));
        if now.duration_since(entry.0) > self.auto_blacklist_window {
            *entry = (now, 1);
        } else {
            entry.1 += 1;
        }
        let strikes = entry.1;
        if strikes >= self.auto_blacklist_strikes {
            counts.remove(script_id);
            drop(counts);
            self.blacklist_script_for(
                script_id,
                self.auto_blacklist_cooldown,
                &format!(
                    "{} timeouts in {}s",
                    strikes,
                    self.auto_blacklist_window.as_secs()
                ),
            );
        }
    }

    /// Clear the timeout strike counter for `script_id`. Called after
    /// a batch succeeds so a recovered deployment doesn't keep stale
    /// strikes from hours ago — three strikes must occur within one
    /// real failure burst, not accumulate across unrelated incidents.
    pub(crate) fn record_batch_success(&self, script_id: &str) {
        let mut counts = self.script_timeouts.lock().unwrap();
        counts.remove(script_id);
    }

    /// Log a relay failure with extra guidance on cert-validation cases.
    /// Rate-limited so a flood of identical "UnknownIssuer" errors doesn't
    /// fill the log.
    fn log_relay_failure(&self, e: &FronterError) {
        let msg = e.to_string();
        let is_cert_issue = msg.contains("UnknownIssuer")
            || msg.contains("invalid peer certificate")
            || msg.contains("CertificateExpired")
            || msg.contains("CertNotValidYet")
            || msg.contains("NotValidForName");
        if is_cert_issue
            && !self
                .cert_hint_shown
                .swap(true, std::sync::atomic::Ordering::Relaxed)
        {
            // First time — print the full diagnostic. Subsequent hits
            // drop to debug so the log stays readable.
            tracing::error!(
                "Relay failed: {} — this almost always means one of:\n  \
                 (1) your ISP or a middlebox is intercepting TLS to the Google edge \
                 (common in Iran / IR);\n  \
                 (2) the `google_ip` in your config is pointing at a non-Google host;\n  \
                 (3) your system clock is way off (NTP not synced).\n\
                 Fixes (try in order): run `mhrv-rs scan-ips` to find a different Google \
                 frontend IP that isn't being MITM'd; check `date` on your host; as a \
                 LAST RESORT set `\"verify_ssl\": false` in config.json — this lets the \
                 relay work even through a middlebox, but your traffic is then only \
                 protected by the Apps Script relay's secret `auth_key`, not by outer TLS.",
                e
            );
        } else if is_cert_issue {
            tracing::debug!("Relay failed (cert): {}", e);
        } else {
            tracing::error!("Relay failed: {}", e);
        }
    }

    fn next_sni(&self) -> String {
        let n = self.sni_hosts.len();
        let i = self.sni_idx.fetch_add(1, Ordering::Relaxed) % n;
        self.sni_hosts[i].clone()
    }

    async fn open(&self) -> Result<PooledStream, FronterError> {
        // Bounded TCP+TLS open. See `H1_OPEN_TIMEOUT_SECS`.
        let work = async {
            let tcp = TcpStream::connect((self.connect_host.as_str(), 443u16)).await?;
            let _ = tcp.set_nodelay(true);
            let sni = self.next_sni();
            let name = ServerName::try_from(sni)?;
            // Always use the h1-only connector here — the pool only holds
            // sockets that the raw HTTP/1.1 fallback path can write to.
            // Using the shared connector would let some pooled sockets
            // negotiate h2, which would then misframe every fallback
            // request that lands on them.
            let tls = self.tls_connector_h1.connect(name, tcp).await?;
            Ok::<_, FronterError>(tls)
        };
        match tokio::time::timeout(Duration::from_secs(H1_OPEN_TIMEOUT_SECS), work).await {
            Ok(r) => r,
            Err(_) => Err(FronterError::Relay(format!(
                "h1 open timed out after {}s",
                H1_OPEN_TIMEOUT_SECS
            ))),
        }
    }

    /// Open outbound TLS connections eagerly so the first relay request
    /// doesn't pay a cold handshake.
    ///
    /// h2 and h1 prewarm run in parallel: a request that arrives while
    /// the h2 handshake is still in flight (or has just hit its 8 s
    /// timeout) needs a warm h1 socket waiting for it, otherwise the
    /// h1 fallback path pays a cold handshake on the same slow network
    /// and the 30 s outer batch budget elapses (#924). v1.9.14 warmed
    /// h1 unconditionally; v1.9.15 (PR #799) accidentally gated the h1
    /// prewarm behind `ensure_h2()` so the h1 pool stayed empty during
    /// the h2 init window.
    ///
    /// The spawned h2 handshake races h1[0] — boot fires two TLS
    /// handshakes back-to-back. The 500 ms stagger only applies between
    /// h1[i] and h1[i+1] for i ≥ 1, so we don't burst the remaining
    /// h1[1..n] handshakes at the Google edge simultaneously. Each
    /// connection gets an 8 s expiry offset so they roll off gradually
    /// instead of all hitting POOL_TTL_SECS at once. If h2 ends up the
    /// active fast path, `run_pool_refill` trims the pool back down to
    /// `POOL_MIN_H2_FALLBACK` on the next tick — the extra warm h1
    /// sockets just age out naturally instead of being kept alive.
    pub async fn warm(self: &Arc<Self>, n: usize) {
        // Spawn the h2 prewarm in parallel so the h1 prewarm loop
        // below isn't blocked on it. Capturing the join handle lets
        // us still log "h2 fast path active" / "h1 fallback only"
        // accurately at the end.
        let h2_self = self.clone();
        let h2_handle = tokio::spawn(async move {
            !h2_self.h2_disabled.load(Ordering::Relaxed)
                && h2_self.ensure_h2().await.is_some()
        });