-
Notifications
You must be signed in to change notification settings - Fork 549
Expand file tree
/
Copy pathproxy_server.rs
More file actions
3742 lines (3493 loc) · 143 KB
/
proxy_server.rs
File metadata and controls
3742 lines (3493 loc) · 143 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use std::collections::{HashMap, VecDeque};
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use bytes::Bytes;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::{TcpListener, TcpStream, UdpSocket};
use tokio::sync::{mpsc, Mutex};
use tokio::task::JoinSet;
use tokio_rustls::rustls::client::danger::{
HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier,
};
use tokio_rustls::rustls::pki_types::{CertificateDer, ServerName, UnixTime};
use tokio_rustls::rustls::server::Acceptor;
use tokio_rustls::rustls::{ClientConfig, DigitallySignedStruct, SignatureScheme};
use tokio_rustls::{LazyConfigAcceptor, TlsAcceptor, TlsConnector};
use crate::config::{Config, FrontingGroup, Mode};
use crate::domain_fronter::DomainFronter;
use crate::mitm::MitmCertManager;
use crate::tunnel_client::{decode_udp_packets, TunnelMux};
// Domains that are served from Google's core frontend IP pool and therefore
// respond correctly when we connect to `google_ip` with SNI=`front_domain`
// and Host=<the real domain>. Routing these via the tunnel instead of the
// Apps Script relay also avoids Apps Script's fixed "Google-Apps-Script"
// User-Agent, which makes Google serve the bot/no-JS fallback for search.
// Kept conservative: anything on a separate CDN (googlevideo, ytimg,
// doubleclick, etc.) is DROPPED because routing to the wrong backend breaks
// rather than helps. Those fall through to MITM+relay (slower but works).
// Domains that are hosted on the Google Front End and therefore reachable via
// the same SNI-rewrite tunnel used for www.google.com itself. Adding a suffix
// here means "TLS CONNECT to google_ip, SNI = front_domain, Host = real name"
// for requests to it — bypassing the Apps Script relay entirely, so there's no
// User-Agent locking and no Apps Script quota.
// When in doubt leave it out: sites that aren't actually on GFE will 404 or
// return a wrong-cert error instead of loading.
const SNI_REWRITE_SUFFIXES: &[&str] = &[
// Core Google
"google.com",
"gstatic.com",
"googleusercontent.com",
"googleapis.com",
"ggpht.com",
// YouTube family
"youtube.com",
"youtu.be",
"youtube-nocookie.com",
"ytimg.com",
// NOTE on `googlevideo.com`: v1.7.4 (#275) added this here on the
// theory that video chunks should bypass the Apps Script relay.
// **Reverted in v1.7.6** — multiple users (#275 amirabbas117, #281
// mrerf) reported total YouTube breakage after v1.7.4. Root cause
// is that googlevideo.com is served by Google's separate "EVA"
// edge IPs, not the regular GFE IPs that the user's `google_ip`
// typically points at. SNI-rewriting `googlevideo.com:443` to a
// GFE IP got TLS handshake / wrong-cert errors for those users.
// Pre-v1.7.4 behaviour (chunks via the Apps Script relay path —
// slow but reliable on every GFE IP) is restored. If we ever want
// direct googlevideo.com routing, it needs a separate config knob
// that lets users specify their EVA edge IP independently.
// Google Video Transport CDN — YouTube video chunks, Chrome
// auto-updates, Google Play Store downloads. The single biggest
// gap vs the upstream Python port: without these in the list
// YouTube video playback stalls because every chunk tries to
// traverse Apps Script instead of the direct GFE tunnel.
"gvt1.com",
"gvt2.com",
// Ad + analytics infra. All on GFE, all previously broken the
// same way YouTube was: SNI-blocked on Iranian DPI, but reachable
// via `google_ip` with SNI rewritten.
"doubleclick.net",
"googlesyndication.com",
"googleadservices.com",
"google-analytics.com",
"googletagmanager.com",
"googletagservices.com",
// fonts.googleapis.com is technically covered by the googleapis.com
// suffix above, but mirroring Python's explicit listing makes the
// intent obvious at a glance.
"fonts.googleapis.com",
// Blogger / Blog.google
"blogspot.com",
"blogger.com",
];
/// YouTube hosts that should be routed through the Apps Script relay
/// when `youtube_via_relay` is enabled — the API + HTML surfaces where
/// Restricted Mode is actually enforced (via the SNI=www.google.com
/// edge looking at the request). Issue #102 / #275.
///
/// Deliberately narrower than the YouTube section of
/// `SNI_REWRITE_SUFFIXES`:
/// - `youtube.com` / `youtu.be` / `youtube-nocookie.com`: HTML pages
/// and player frames. These trigger Restricted Mode if served via
/// the SNI rewrite, so when the flag is on we relay them.
/// - `youtubei.googleapis.com`: the YouTube data API the player
/// queries for video metadata + manifest. Restricted Mode also
/// gates video availability here. Without this entry, the JSON
/// RPC layer would still hit the SNI-rewrite tunnel via the
/// broader `googleapis.com` suffix — the user-visible symptom of
/// that miss is "youtube_via_relay flips on but Restricted Mode
/// stays sticky on some videos."
///
/// **NOT** in this list (intentional, was a regression in #275):
/// - `ytimg.com`: thumbnails. No Restricted Mode logic on a static
/// image CDN; routing through Apps Script makes thumbnails slow
/// for zero gain.
/// - `googlevideo.com`: video chunk CDN. Routing through Apps Script
/// means every chunk eats Apps Script quota *and* risks the 6-min
/// execution cap aborting long videos mid-playback.
/// - `ggpht.com`: channel/profile images, same reasoning as ytimg.
const YOUTUBE_RELAY_HOSTS: &[&str] = &[
"youtube.com",
"youtu.be",
"youtube-nocookie.com",
"youtubei.googleapis.com",
];
/// Built-in list of DNS-over-HTTPS endpoints. CONNECTs to these (when
/// `tunnel_doh` is left at the default of `false`, i.e. bypass enabled)
/// skip the Apps Script tunnel and exit via plain TCP. Mix of the
/// browser-pinned variants Chrome/Brave/Edge/Firefox/Safari use and the
/// well-known public DoH providers users wire up by hand. Suffix
/// matching means we don't need to enumerate every tenant subdomain
/// (e.g. `*.cloudflare-dns.com` covers Workers-hosted DoH too).
///
/// Entries are matched case-insensitively. Both exact-match (`dns.google`)
/// and dot-anchored suffix-match (a host whose suffix is `.cloudflare-dns.com`
/// or which equals `cloudflare-dns.com`) are accepted — same shape as
/// `passthrough_hosts`'s `.foo` rule.
const DEFAULT_DOH_HOSTS: &[&str] = &[
// The base SLD covers every tenant subdomain via suffix matching;
// the browser-pinned variants below are listed for grep/discovery
// (so a user searching "chrome.cloudflare-dns.com" finds this list)
// and are technically redundant under cloudflare-dns.com.
"cloudflare-dns.com",
"chrome.cloudflare-dns.com",
"mozilla.cloudflare-dns.com",
"1dot1dot1dot1.cloudflare-dns.com",
"dns.google",
"dns.google.com",
"dns.quad9.net",
"dns11.quad9.net",
"dns.adguard-dns.com",
"unfiltered.adguard-dns.com",
"family.adguard-dns.com",
"dns.nextdns.io",
"doh.opendns.com",
"doh.cleanbrowsing.org",
"doh.dns.sb",
"dns0.eu",
"dns.alidns.com",
"doh.pub",
"dns.mullvad.net",
];
fn matches_sni_rewrite(host: &str, youtube_via_relay: bool) -> bool {
let h = host.to_ascii_lowercase();
let h = h.trim_end_matches('.');
// YouTube relay carve-out runs FIRST so it wins over the broad
// `googleapis.com` suffix that would otherwise pull
// `youtubei.googleapis.com` into the SNI-rewrite path. The earlier
// implementation iterated SNI_REWRITE_SUFFIXES with a filter, which
// works for sibling entries (e.g. `youtube.com` in both lists) but
// not for nested ones (`youtubei.googleapis.com` matches the broad
// `googleapis.com` even when its specific entry is filtered out).
// The short-circuit here is unconditional — we don't need to check
// SNI rewrite once we've decided this host goes to the relay.
if youtube_via_relay {
for s in YOUTUBE_RELAY_HOSTS {
if h == *s || h.ends_with(&format!(".{}", s)) {
return false;
}
}
}
SNI_REWRITE_SUFFIXES
.iter()
.any(|s| h == *s || h.ends_with(&format!(".{}", s)))
}
fn hosts_override<'a>(
hosts: &'a std::collections::HashMap<String, String>,
host: &str,
) -> Option<&'a str> {
let h = host.to_ascii_lowercase();
let h = h.trim_end_matches('.');
if let Some(ip) = hosts.get(h) {
return Some(ip.as_str());
}
let parts: Vec<&str> = h.split('.').collect();
for i in 1..parts.len() {
let parent = parts[i..].join(".");
if let Some(ip) = hosts.get(&parent) {
return Some(ip.as_str());
}
}
None
}
#[derive(Debug, thiserror::Error)]
pub enum ProxyError {
#[error("io: {0}")]
Io(#[from] std::io::Error),
}
pub struct ProxyServer {
host: String,
port: u16,
socks5_port: u16,
/// `None` in `direct` mode: no Apps Script relay is wired up,
/// only the SNI-rewrite tunnel path (Google edge + any configured
/// `fronting_groups`) is live.
fronter: Option<Arc<DomainFronter>>,
mitm: Arc<Mutex<MitmCertManager>>,
rewrite_ctx: Arc<RewriteCtx>,
tunnel_mux: Option<Arc<TunnelMux>>,
coalesce_step_ms: u64,
coalesce_max_ms: u64,
}
pub struct RewriteCtx {
pub google_ip: String,
pub front_domain: String,
pub hosts: std::collections::HashMap<String, String>,
pub tls_connector: TlsConnector,
pub upstream_socks5: Option<String>,
pub mode: Mode,
/// If true, YouTube traffic bypasses the SNI-rewrite tunnel and
/// goes through the Apps Script relay instead. See config.rs for
/// the trade-off. Issue #102.
pub youtube_via_relay: bool,
/// User-configured hostnames that should skip the relay entirely
/// and pass through as plain TCP (optionally via upstream_socks5).
/// See config.rs `passthrough_hosts` for matching rules. Issues #39, #127.
pub passthrough_hosts: Vec<String>,
/// If true, drop SOCKS5 UDP datagrams destined for port 443 so
/// callers fall back to TCP/HTTPS. See config.rs `block_quic` for
/// the trade-off. Issue #213.
pub block_quic: bool,
pub block_stun: bool,
/// If true, route DoH CONNECTs around the Apps Script tunnel via
/// plain TCP. Default false via `Config::tunnel_doh = true` (flipped
/// in v1.9.0, issue #468). See `DEFAULT_DOH_HOSTS` and
/// `matches_doh_host` for matching, and config.rs `tunnel_doh` for
/// the trade-off.
pub bypass_doh: bool,
/// When true, immediately reject connections to known DoH hosts.
/// Takes priority over bypass_doh.
pub block_doh: bool,
/// User-supplied DoH hostnames added to the built-in default list.
/// Same matching semantics as `passthrough_hosts`.
pub bypass_doh_hosts: Vec<String>,
/// Multi-edge fronting groups, resolved at startup. Each group's
/// `ServerName` is parsed once so the per-connection dial path
/// is allocation-free. Wrapped in `Arc` so a per-CONNECT match
/// can hand the dispatcher a refcount-clone instead of cloning
/// the whole struct (which holds a `Vec<String>` of normalized
/// domains used only for matching). Empty = feature off (only
/// the built-in Google edge SNI-rewrite is active).
pub fronting_groups: Vec<Arc<FrontingGroupResolved>>,
}
/// True if `host` matches a known DoH endpoint — either the built-in
/// `DEFAULT_DOH_HOSTS` list or a user-supplied entry in `extra`. Match
/// is case-insensitive, and entries match either exactly OR as a
/// dot-anchored suffix unconditionally (no leading-dot requirement,
/// unlike `passthrough_hosts`). The DoH list is *always* about a
/// service — every legitimate tenant subdomain of `cloudflare-dns.com`
/// or a user's private `doh.acme.test` is a DoH endpoint, so requiring
/// users to remember to write `.doh.acme.test` would be a footgun
/// without an obvious benefit.
fn host_matches_doh_entry(h: &str, entry: &str) -> bool {
let e = entry.trim().trim_end_matches('.').to_ascii_lowercase();
let e = e.strip_prefix('.').unwrap_or(&e);
if e.is_empty() {
return false;
}
h == e || h.ends_with(&format!(".{}", e))
}
pub fn matches_doh_host(host: &str, extra: &[String]) -> bool {
let h = host.to_ascii_lowercase();
let h = h.trim_end_matches('.');
if h.is_empty() {
return false;
}
if DEFAULT_DOH_HOSTS
.iter()
.any(|s| host_matches_doh_entry(h, s))
{
return true;
}
extra.iter().any(|s| host_matches_doh_entry(h, s))
}
/// A `FrontingGroup` after one-time validation: the group's `sni` is
/// parsed into a `ServerName` so we don't repay that on every dialed
/// connection, and domain entries are pre-lower-cased + dot-trimmed
/// so the per-request match path is just byte comparisons.
#[derive(Debug, Clone)]
pub struct FrontingGroupResolved {
pub name: String,
pub ip: String,
pub sni: String,
pub server_name: ServerName<'static>,
domains_normalized: Vec<String>,
}
impl FrontingGroupResolved {
fn from_config(g: &FrontingGroup) -> Result<Self, String> {
let server_name = ServerName::try_from(g.sni.clone())
.map_err(|e| format!("invalid sni '{}': {}", g.sni, e))?;
let domains_normalized = g
.domains
.iter()
.map(|d| d.trim().trim_end_matches('.').to_ascii_lowercase())
.filter(|d| !d.is_empty())
.collect();
Ok(Self {
name: g.name.clone(),
ip: g.ip.clone(),
sni: g.sni.clone(),
server_name,
domains_normalized,
})
}
}
/// First fronting group whose domain list contains `host`, if any.
/// Match is case-insensitive and unconditionally suffix-anchored: an
/// entry `vercel.com` matches both `vercel.com` and `*.vercel.com`.
/// This is the right shape for fronting because every legitimate
/// subdomain of a fronted domain is itself fronted by the same edge
/// — requiring users to spell out every subdomain would be a footgun.
/// Same matching shape as the DoH host list. First match wins, so
/// users can put more-specific groups earlier when entries would
/// otherwise overlap.
pub fn match_fronting_group<'a>(
host: &str,
groups: &'a [Arc<FrontingGroupResolved>],
) -> Option<&'a Arc<FrontingGroupResolved>> {
if groups.is_empty() {
return None;
}
let h = host.to_ascii_lowercase();
let h = h.trim_end_matches('.');
if h.is_empty() {
return None;
}
for g in groups {
for d in &g.domains_normalized {
if is_dot_anchored_match(h, d) {
return Some(g);
}
}
}
None
}
/// True if `host` equals `entry` exactly OR is a strict dot-anchored
/// suffix of it (i.e. `entry == "vercel.com"` matches `host ==
/// "app.vercel.com"` but not `host == "xvercel.com"`). Both inputs
/// must already be lowercase + trailing-dot trimmed; the function
/// does no allocation, unlike the obvious `format!(".{}", entry)`
/// implementation that allocates per call.
#[inline]
fn is_dot_anchored_match(host: &str, entry: &str) -> bool {
if host == entry {
return true;
}
let hb = host.as_bytes();
let eb = entry.as_bytes();
hb.len() > eb.len()
&& hb.ends_with(eb)
&& hb[hb.len() - eb.len() - 1] == b'.'
}
/// True if `host` matches any entry in the user's passthrough list.
/// Match is case-insensitive. Entries match either exactly, or as a
/// suffix if they start with "." (e.g. ".internal.example" matches
/// "a.b.internal.example" and the bare "internal.example"). Bare
/// entries like "example.com" only match the exact hostname — users
/// who want subdomains included should use ".example.com".
pub fn matches_passthrough(host: &str, list: &[String]) -> bool {
if list.is_empty() {
return false;
}
let h = host.to_ascii_lowercase();
let h = h.trim_end_matches('.');
list.iter().any(|entry| {
let e = entry.trim().trim_end_matches('.').to_ascii_lowercase();
if e.is_empty() {
return false;
}
if let Some(suffix) = e.strip_prefix('.') {
h == suffix || h.ends_with(&format!(".{}", suffix))
} else {
h == e
}
})
}
impl ProxyServer {
pub fn new(config: &Config, mitm: Arc<Mutex<MitmCertManager>>) -> Result<Self, ProxyError> {
let mode = config
.mode_kind()
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{e}")))?;
// `direct` mode skips the Apps Script relay entirely, so we must
// not try to construct the DomainFronter — it errors on a missing
// `script_id`, which is exactly the state a direct-mode user is in.
let fronter = match mode {
Mode::AppsScript | Mode::Full => {
let f = DomainFronter::new(config)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{e}")))?;
Some(Arc::new(f))
}
Mode::Direct => None,
};
let tls_config = if config.verify_ssl {
let mut roots = tokio_rustls::rustls::RootCertStore::empty();
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
ClientConfig::builder()
.with_root_certificates(roots)
.with_no_client_auth()
} else {
ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoVerify))
.with_no_client_auth()
};
let tls_connector = TlsConnector::from(Arc::new(tls_config));
// Surface a config combo that is otherwise silently inert: extras
// listed under `bypass_doh_hosts` only take effect when the bypass
// itself is on. A user who set `tunnel_doh: true` *and* populated
// the extras list almost certainly didn't mean to disable the
// feature their custom hosts feed into.
if config.tunnel_doh && !config.bypass_doh_hosts.is_empty() {
tracing::warn!(
"config: bypass_doh_hosts has {} entries but tunnel_doh=true — \
the bypass is off, so the extras have no effect. Set \
tunnel_doh=false (or omit it) to use them.",
config.bypass_doh_hosts.len()
);
}
// Same-shape warning for fronting_groups in full mode. The dispatch
// short-circuits to the tunnel mux before the fronting_groups check
// (full mode preserves end-to-end TLS, fronting_groups requires
// MITM), so groups configured here will never fire. Surface this
// at startup rather than letting users wonder why their Vercel
// domains never hit the configured edge.
if mode == Mode::Full && !config.fronting_groups.is_empty() {
tracing::warn!(
"config: fronting_groups has {} entries but mode=full — \
full mode tunnels everything end-to-end through Apps Script \
(no MITM), so groups never fire. Switch to mode=apps_script \
or mode=direct to use them, or remove the groups to silence \
this warning.",
config.fronting_groups.len()
);
}
let mut fronting_groups: Vec<Arc<FrontingGroupResolved>> =
Vec::with_capacity(config.fronting_groups.len());
let mut seen_names: std::collections::HashSet<String> = Default::default();
for g in &config.fronting_groups {
let resolved = FrontingGroupResolved::from_config(g).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("fronting_groups['{}']: {}", g.name, e),
)
})?;
// Surface duplicate group names at startup. Not a hard
// error — copy-pasted configs can land here legitimately
// — but log lines key on `name` and dedup ambiguity makes
// them unreadable.
if !seen_names.insert(resolved.name.clone()) {
tracing::warn!(
"fronting group name '{}' is used by more than one group; \
log lines that reference the name will be ambiguous",
resolved.name
);
}
tracing::info!(
"fronting group '{}': sni={} ip={} domains={}",
resolved.name,
resolved.sni,
resolved.ip,
resolved.domains_normalized.len()
);
fronting_groups.push(Arc::new(resolved));
}
let rewrite_ctx = Arc::new(RewriteCtx {
google_ip: config.google_ip.clone(),
front_domain: config.front_domain.clone(),
hosts: config.hosts.clone(),
tls_connector,
upstream_socks5: config.upstream_socks5.clone(),
mode,
youtube_via_relay: config.youtube_via_relay,
passthrough_hosts: config.passthrough_hosts.clone(),
block_quic: config.block_quic,
block_stun: config.block_stun,
bypass_doh: !config.tunnel_doh,
block_doh: config.block_doh,
bypass_doh_hosts: config.bypass_doh_hosts.clone(),
fronting_groups,
});
let socks5_port = config.socks5_port.unwrap_or(config.listen_port + 1);
Ok(Self {
host: config.listen_host.clone(),
port: config.listen_port,
socks5_port,
fronter,
mitm,
rewrite_ctx,
tunnel_mux: None, // initialized in run() inside the tokio runtime
coalesce_step_ms: if config.coalesce_step_ms > 0 { config.coalesce_step_ms as u64 } else { 10 },
coalesce_max_ms: if config.coalesce_max_ms > 0 { config.coalesce_max_ms as u64 } else { 1000 },
})
}
pub fn fronter(&self) -> Option<Arc<DomainFronter>> {
self.fronter.clone()
}
pub async fn run(
mut self,
mut shutdown_rx: tokio::sync::oneshot::Receiver<()>,
) -> Result<(), ProxyError> {
// Initialize TunnelMux inside the runtime (tokio::spawn requires it).
if self.rewrite_ctx.mode == Mode::Full {
if let Some(f) = self.fronter.as_ref() {
self.tunnel_mux = Some(TunnelMux::start(f.clone(), self.coalesce_step_ms, self.coalesce_max_ms));
}
}
let http_addr = format!("{}:{}", self.host, self.port);
let socks_addr = format!("{}:{}", self.host, self.socks5_port);
let http_listener = TcpListener::bind(&http_addr).await?;
let socks_listener = TcpListener::bind(&socks_addr).await?;
tracing::warn!(
"Listening HTTP on {} — set your browser HTTP proxy to this address.",
http_addr
);
tracing::warn!(
"Listening SOCKS5 on {} — xray / Telegram / app-level SOCKS5 clients use this.",
socks_addr
);
// Pre-warm the outbound connection pool so the user's first request
// doesn't pay a fresh TLS handshake to Google edge. Best-effort;
// failures are logged and ignored. Skipped in `direct` mode —
// there is no fronter to warm.
//
// Sized to roughly match a browser's parallel-connection burst at
// startup. The previous fixed `3` was fine for a single deployment
// but left requests 4-10 of the opening burst paying a cold TLS
// handshake each (~300ms). Scaling with deployment count gives
// multi-account configs a proportionally warmer pool, capped so
// single-deployment users don't hammer Google edge unnecessarily.
if let Some(warm_fronter) = self.fronter.clone() {
let n = warm_fronter.num_scripts().clamp(6, 16);
tokio::spawn(async move {
warm_fronter.warm(n).await;
});
}
// Apps Script container keepalive. `warm()` above keeps the TCP
// pool warm at startup, but the V8 container behind UrlFetchApp
// goes cold after ~5min idle and costs 1-3s to wake. A periodic
// HEAD ping prevents the cold-start lag on the first request
// after a quiet pause (most visible as YouTube player stalls).
// Skipped in direct mode for the same reason as warm —
// there's no fronter to ping.
//
// The handle is captured (not fire-and-forget) so the shutdown
// arm of the select! below can abort it. Without that, hitting
// Stop in the UI would leave the keepalive holding an
// Arc<DomainFronter> on stale config and pinging Apps Script
// every 240s — same class of bug that issue #99 hit for the
// accept loops.
let keepalive_task = if let Some(keepalive_fronter) = self.fronter.clone() {
tokio::spawn(async move {
keepalive_fronter.run_keepalive().await;
})
} else {
tokio::spawn(async move { std::future::pending::<()>().await })
};
// Background pool refill: keeps at least POOL_MIN ready TLS
// connections so acquire() never pays a cold handshake.
let refill_task = if let Some(refill_fronter) = self.fronter.clone() {
tokio::spawn(async move {
refill_fronter.run_pool_refill().await;
})
} else {
tokio::spawn(async move { std::future::pending::<()>().await })
};
let stats_task = if let Some(stats_fronter) = self.fronter.clone() {
tokio::spawn(async move {
let mut ticker = tokio::time::interval(std::time::Duration::from_secs(60));
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
ticker.tick().await;
loop {
ticker.tick().await;
let s = stats_fronter.snapshot_stats();
if s.relay_calls > 0 || s.cache_hits > 0 {
tracing::info!("{}", s.fmt_line());
}
}
})
} else {
tokio::spawn(async move { std::future::pending::<()>().await })
};
let http_fronter = self.fronter.clone();
let http_mitm = self.mitm.clone();
let http_ctx = self.rewrite_ctx.clone();
let http_mux = self.tunnel_mux.clone();
let mut http_task = tokio::spawn(async move {
let mut fd_exhaust_count: u64 = 0;
// Track every per-client child task in a JoinSet so that when
// this accept task is aborted on shutdown, dropping the JoinSet
// aborts the children too. Previously children were bare
// `tokio::spawn(...)` handles with no ownership — aborting the
// parent accept loop stopped taking new connections but left
// in-flight ones running with the OLD config. That manifested
// as "hitting Stop in the UI doesn't actually stop anything
// already running" (issue #99) and as "changing auth_key and
// Start doesn't take effect for domains with a live
// keep-alive" because the old DomainFronter stayed alive
// inside those child tasks.
let mut children: tokio::task::JoinSet<()> = tokio::task::JoinSet::new();
loop {
// Opportunistic reap so completed children don't pile up
// memory on long-running proxies.
while children.try_join_next().is_some() {}
let (sock, peer) = match http_listener.accept().await {
Ok(x) => {
fd_exhaust_count = 0;
x
}
Err(e) => {
accept_backoff("http", &e, &mut fd_exhaust_count).await;
continue;
}
};
let _ = sock.set_nodelay(true);
let fronter = http_fronter.clone();
let mitm = http_mitm.clone();
let rewrite_ctx = http_ctx.clone();
let mux = http_mux.clone();
children.spawn(async move {
if let Err(e) = handle_http_client(sock, fronter, mitm, rewrite_ctx, mux).await
{
tracing::debug!("http client {} closed: {}", peer, e);
}
});
}
});
let socks_fronter = self.fronter.clone();
let socks_mitm = self.mitm.clone();
let socks_ctx = self.rewrite_ctx.clone();
let socks_mux = self.tunnel_mux.clone();
let mut socks_task = tokio::spawn(async move {
let mut fd_exhaust_count: u64 = 0;
// Same pattern as http_task above — JoinSet so shutdown
// drops in-flight SOCKS5 clients instead of leaving them to
// keep running on the stale config.
let mut children: tokio::task::JoinSet<()> = tokio::task::JoinSet::new();
loop {
while children.try_join_next().is_some() {}
let (sock, peer) = match socks_listener.accept().await {
Ok(x) => {
fd_exhaust_count = 0;
x
}
Err(e) => {
accept_backoff("socks", &e, &mut fd_exhaust_count).await;
continue;
}
};
let _ = sock.set_nodelay(true);
let fronter = socks_fronter.clone();
let mitm = socks_mitm.clone();
let rewrite_ctx = socks_ctx.clone();
let mux = socks_mux.clone();
children.spawn(async move {
if let Err(e) =
handle_socks5_client(sock, fronter, mitm, rewrite_ctx, mux).await
{
tracing::debug!("socks client {} closed: {}", peer, e);
}
});
}
});
tokio::select! {
biased;
_ = &mut shutdown_rx => {
tracing::info!("Shutdown signal received, stopping listeners");
stats_task.abort();
keepalive_task.abort();
refill_task.abort();
http_task.abort();
socks_task.abort();
}
_ = &mut http_task => {}
_ = &mut socks_task => {}
}
Ok(())
}
}
/// Back-off helper for the accept() loop.
///
/// Motivated by issue #18: when the process hits its file-descriptor limit
/// (EMFILE — `No file descriptors available`), `accept()` returns that
/// error synchronously and is immediately ready to fire again. The old
/// loop just `continue`'d, producing a wall of identical ERROR lines
/// thousands per second and starving the tokio runtime of CPU that
/// existing connections would have used to drain and close.
///
/// Two things this does right:
/// 1. Sleeps when `EMFILE` / `ENFILE` are seen, proportional to how long
/// the problem has been going on (exponential-ish, capped at 2s).
/// Gives existing connections a chance to finish and free fds.
/// 2. Rate-limits the log line: first occurrence logs a full warning
/// with fix instructions, subsequent ones log once per 100 errors
/// so the log doesn't fill up.
async fn accept_backoff(kind: &str, err: &std::io::Error, count: &mut u64) {
let is_fd_limit = matches!(
err.raw_os_error(),
Some(libc_emfile) if libc_emfile == 24 || libc_emfile == 23
);
*count = count.saturating_add(1);
if is_fd_limit {
if *count == 1 {
tracing::warn!(
"accept ({}) hit RLIMIT_NOFILE: {}. Backing off. Raise the fd limit: \
`ulimit -n 65536` before starting, or (OpenWRT) use the shipped procd \
init which sets nofile=16384. The listener will keep retrying.",
kind,
err
);
} else if *count % 100 == 0 {
tracing::warn!(
"accept ({}) still fd-limited after {} retries. Current connections \
need to finish before we can accept new ones.",
kind,
*count
);
}
// Back off exponentially-ish up to 2s. First hit: 50ms, 10th hit:
// ~500ms, 50th+: 2s cap.
let backoff_ms = (50u64 * (*count).min(40)).min(2000);
tokio::time::sleep(std::time::Duration::from_millis(backoff_ms)).await;
} else {
// Transient non-EMFILE error (e.g. ECONNABORTED from a client that
// went away during the handshake). One-line log, short sleep to
// avoid a tight loop in case it repeats.
tracing::error!("accept ({}): {}", kind, err);
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
}
}
async fn handle_http_client(
mut sock: TcpStream,
fronter: Option<Arc<DomainFronter>>,
mitm: Arc<Mutex<MitmCertManager>>,
rewrite_ctx: Arc<RewriteCtx>,
tunnel_mux: Option<Arc<TunnelMux>>,
) -> std::io::Result<()> {
let (head, leftover) = match read_http_head(&mut sock).await? {
HeadReadResult::Got { head, leftover } => (head, leftover),
HeadReadResult::Closed => return Ok(()),
HeadReadResult::Oversized => {
// Reply with 431 instead of just dropping the socket so the
// browser shows a real error rather than retrying the same
// oversized request in a loop.
tracing::warn!(
"request head exceeds {} bytes — refusing with 431",
MAX_HEADER_BYTES
);
let _ = sock
.write_all(
b"HTTP/1.1 431 Request Header Fields Too Large\r\n\
Connection: close\r\n\
Content-Length: 0\r\n\r\n",
)
.await;
let _ = sock.flush().await;
return Ok(());
}
};
let (method, target, _version, _headers) = parse_request_head(&head)
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidData, "bad request"))?;
if method.eq_ignore_ascii_case("CONNECT") {
let (host, port) = parse_host_port(&target);
// Mirror the SOCKS5 short-circuit: if the tunnel-node just failed
// this (host, port) with unreachable, return 502 immediately rather
// than acknowledging the CONNECT and blowing tunnel quota on a
// guaranteed retry. See `TunnelMux::is_unreachable` for context.
if let Some(ref mux) = tunnel_mux {
if mux.is_unreachable(&host, port) {
tracing::info!("CONNECT {}:{} (negative-cached, refusing)", host, port);
let _ = sock
.write_all(b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\nConnection: close\r\n\r\n")
.await;
let _ = sock.flush().await;
return Ok(());
}
}
sock.write_all(b"HTTP/1.1 200 Connection Established\r\n\r\n")
.await?;
sock.flush().await?;
dispatch_tunnel(
sock,
host,
port,
fronter,
mitm,
rewrite_ctx,
tunnel_mux,
false,
)
.await
} else {
// Plain HTTP proxy request (e.g. `GET http://…`).
//
// apps_script mode: relay through the Apps Script fronter (which
// is the whole point of the relay).
//
// direct mode: no fronter exists, so passthrough as raw TCP.
// Same contract as `dispatch_tunnel` honors for CONNECT in
// direct mode — anything not on the Google edge / not in a
// configured fronting_group is forwarded direct (or via
// `upstream_socks5`) so the user's browser still works while
// they finish setting up Apps Script. Issue: typing a bare
// `http://example.com` URL used to return a 502 here even
// though `https://example.com` (CONNECT) worked fine.
match fronter {
Some(f) => do_plain_http(sock, &head, &leftover, f).await,
None => do_plain_http_passthrough(sock, &head, &leftover, &rewrite_ctx).await,
}
}
}
// ---------- SOCKS5 ----------
async fn handle_socks5_client(
mut sock: TcpStream,
fronter: Option<Arc<DomainFronter>>,
mitm: Arc<Mutex<MitmCertManager>>,
rewrite_ctx: Arc<RewriteCtx>,
tunnel_mux: Option<Arc<TunnelMux>>,
) -> std::io::Result<()> {
// RFC 1928 handshake: VER=5, NMETHODS, METHODS...
let mut hdr = [0u8; 2];
sock.read_exact(&mut hdr).await?;
if hdr[0] != 0x05 {
return Ok(());
}
let nmethods = hdr[1] as usize;
let mut methods = vec![0u8; nmethods];
sock.read_exact(&mut methods).await?;
// Only "no auth" (0x00) is supported.
if !methods.contains(&0x00) {
sock.write_all(&[0x05, 0xff]).await?;
return Ok(());
}
sock.write_all(&[0x05, 0x00]).await?;
// Request: VER=5, CMD, RSV=0, ATYP, DST.ADDR, DST.PORT
let mut req = [0u8; 4];
sock.read_exact(&mut req).await?;
if req[0] != 0x05 {
return Ok(());
}
let cmd = req[1];
if cmd != 0x01 && cmd != 0x03 {
// CONNECT and UDP ASSOCIATE only.
sock.write_all(&[0x05, 0x07, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
.await?;
return Ok(());
}
let atyp = req[3];
let host: String = match atyp {
0x01 => {
let mut ip = [0u8; 4];
sock.read_exact(&mut ip).await?;
format!("{}.{}.{}.{}", ip[0], ip[1], ip[2], ip[3])
}
0x03 => {
let mut len = [0u8; 1];
sock.read_exact(&mut len).await?;
let mut name = vec![0u8; len[0] as usize];
sock.read_exact(&mut name).await?;
String::from_utf8_lossy(&name).into_owned()
}
0x04 => {
let mut ip = [0u8; 16];
sock.read_exact(&mut ip).await?;
let addr = std::net::Ipv6Addr::from(ip);
addr.to_string()
}
_ => {
sock.write_all(&[0x05, 0x08, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
.await?;
return Ok(());
}
};
let mut port_buf = [0u8; 2];
sock.read_exact(&mut port_buf).await?;
let port = u16::from_be_bytes(port_buf);
if cmd == 0x03 {
tracing::info!("SOCKS5 UDP ASSOCIATE requested for {}:{}", host, port);
return handle_socks5_udp_associate(sock, rewrite_ctx, tunnel_mux).await;
}
// Negative-cache short-circuit: if the tunnel-node just failed to reach
// this exact (host, port) with `Network is unreachable` / `No route to
// host`, reply 0x04 (Host unreachable) immediately. Saves a 1.5–2s tunnel
// round-trip on guaranteed-failing targets — the IPv6 probe retry loop
// is the main offender on devices without IPv6.
if let Some(ref mux) = tunnel_mux {
if mux.is_unreachable(&host, port) {
tracing::info!("SOCKS5 CONNECT -> {}:{} (negative-cached, refusing)", host, port);
sock.write_all(&[0x05, 0x04, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
.await?;
sock.flush().await?;
return Ok(());
}
}
// Reject STUN/TURN UDP ports immediately so WebRTC (Meet,
// Telegram calls) skips UDP ICE candidates and falls back to
// TCP TURN on :443 without waiting for a timeout.
if rewrite_ctx.block_stun && matches!(port, 3478 | 5349 | 19302) {
tracing::info!("SOCKS5 CONNECT -> {}:{} (STUN/TURN blocked, forcing TCP fallback)", host, port);
sock.write_all(&[0x05, 0x05, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
.await?;
sock.flush().await?;
return Ok(());
}
tracing::info!("SOCKS5 CONNECT -> {}:{}", host, port);
// Success reply with zeroed BND.
sock.write_all(&[0x05, 0x00, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
.await?;
sock.flush().await?;
let require_remote_dns = atyp == 0x03;
dispatch_tunnel(
sock,
host,
port,
fronter,
mitm,
rewrite_ctx,
tunnel_mux,
require_remote_dns,
)
.await
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
struct SocksUdpTarget {
host: String,
port: u16,
atyp: u8,
addr: Vec<u8>,
}
/// Per-target relay session state shared between the dispatch loop and
/// the per-session task. The dispatch loop pushes uplink datagrams via
/// `uplink`; the task drains the upstream and serializes both directions
/// onto a single tunnel-mux call at a time. `sid` is held here so the
/// dispatch teardown path can issue close_session for any task it has
/// to abort mid-await.