Skip to content

Commit 480e8a8

Browse files
jpolitzclaude
andcommitted
Race direct vs proxy for shareurl loads, go with “whatever succeeds”
Joe says: Constraints: - Don't want to proxy everything (be kind to our servers) - Don't want to wait a long time for the direct request to fail (flaky network, weird school blocker) So we kick off both the proxy and the direct request the first time we have a proxy-able candidate. - If the direct request succeeds, cancel the proxy request to CPO and do direct requests for that domain from this page - If the direct request fails, use only proxy requests for that domain for this page A direct request counts as success if it returns 200 OK and text/plain (making sure we don't weirdly accept 200 OK with like... a HTML login page because of a school SSO portal) I think this is a good balance of server load and letting working clients do their thing. Claude says: Responding to Ben's PR feedback to do better than always-proxy. The first fetch to an allowlisted host now runs direct + /load-shareurl in parallel; whichever returns first that verifies (direct = 2xx text/plain) is served to the caller, and the outcome of the direct request locks per-host shouldProxy state for the rest of the page-load. So unrestricted networks pay the proxy hop only once and rapidly switch to direct; blocked networks see direct fail and switch to proxy. Header-only verification (rather than body comparison) lets us abort the in-flight proxy fetch as soon as direct is trusted, keeping server load near zero on the healthy path. proxyStreamFetch now also tears down the upstream connection when the client disconnects, so the abort actually saves bandwidth on the server side too. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 83bd40f commit 480e8a8

2 files changed

Lines changed: 114 additions & 16 deletions

File tree

src/server.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ function start(config, onServerReady) {
224224
} catch (_) { return false; }
225225
},
226226
});
227+
// If the client disconnects (e.g. the browser aborts /load-shareurl after
228+
// direct succeeded), tear down the upstream connection too — otherwise
229+
// we'd keep streaming bytes from raw.githubusercontent.com to nowhere.
230+
res.on('close', function() { upstream.destroy(); });
227231
upstream.on('error', function(err) {
228232
if (!res.headersSent) opts.onError(res, err);
229233
});

src/web/js/beforePyret.js

Lines changed: 110 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,118 @@ var originalPageLoad = Date.now();
44
console.log("originalPageLoad: ", originalPageLoad);
55

66
// Transparently route browser fetches to allowlisted hosts through the
7-
// server-side proxy at /load-shareurl. Some school networks block
8-
// raw.githubusercontent.com directly; the proxy gives those users a working
9-
// path. Installed as early as possible so it catches every fetch caller:
10-
// makeUrlFile in drive.js, the url/url-file import prefetches in cpo-main.js,
11-
// and the Pyret runtime's F.fetch trove (via cross-fetch -> window.fetch).
7+
// server-side proxy at /load-shareurl, but only when the direct path doesn't
8+
// work.
9+
//
10+
// Strategy: the FIRST fetch to an allowlisted host fires direct + proxied in
11+
// parallel. We decide shouldProxy for the rest of the page-load from direct's
12+
// response *headers*:
13+
// - direct returned 2xx with content-type text/plain -> shouldProxy=false:
14+
// serve direct's response, abort the in-flight proxy fetch.
15+
// - direct failed, hung past timeout, or returned anything else
16+
// -> shouldProxy=true:
17+
// serve proxy's response.
18+
// A key idea is that network-blocky things sometimes return 200 with a
19+
// message page about blocking (or an error, but that counts as a fail). We
20+
// don't want to accidentally think that's a success.
21+
// shouldProxy state is in-memory and per-host — never persisted, since
22+
// reachability changes between networks and a stale value would silently
23+
// break loads.
24+
//
25+
// Installed on the global fetch as early as possible so it catches every fetch
26+
// caller; some of them are in the pyret-lang runtime and would be otherwise
27+
// difficult to configure.
1228
const SHAREURL_PROXY_HOSTS = new Set(['raw.githubusercontent.com']);
29+
const SHAREURL_DIRECT_TIMEOUT_MS = 5000;
1330
const _origFetch = window.fetch.bind(window);
14-
window.fetch = function(input, init) {
15-
const urlStr = (typeof input === 'string') ? input
16-
: (typeof Request !== 'undefined' && input instanceof Request) ? input.url
17-
: String(input);
18-
try {
19-
const u = new URL(urlStr, window.location.href);
20-
if (SHAREURL_PROXY_HOSTS.has(u.hostname)) {
21-
return _origFetch('/load-shareurl?url=' + encodeURIComponent(urlStr), init);
22-
}
23-
} catch (_) { /* not a parseable URL; fall through */ }
24-
return _origFetch(input, init);
31+
32+
const _shareurlShouldProxy = new Map(); // host -> boolean
33+
const _shareurlShouldProxyInflight = new Map(); // host -> Promise<boolean>
34+
35+
function _shareurlProxyUrl(fetchInput) {
36+
return '/load-shareurl?url=' + encodeURIComponent(_shareurlInputToUrl(fetchInput));
37+
}
38+
39+
function _shareurlInputToUrl(fetchInput) {
40+
return (typeof fetchInput === 'string') ? fetchInput
41+
: (typeof Request !== 'undefined' && fetchInput instanceof Request) ? fetchInput.url
42+
: String(fetchInput);
43+
}
44+
45+
function _shareurlVerifyDirect(r) {
46+
if (!r.ok) return false;
47+
const ct = (r.headers.get('content-type') || '').toLowerCase();
48+
// Source files served from raw.githubusercontent.com come back as
49+
// text/plain (.arr, .json, .csv, .md all do). Anything else — HTML block
50+
// pages, captive portals, surprise content types — we don't trust as a
51+
// real upstream response.
52+
return ct.startsWith('text/plain');
53+
}
54+
55+
function _shareurlFetch(shouldProxy, fetchInput, fetchInit) {
56+
const maybeProxyInput = shouldProxy ? _shareurlProxyUrl(fetchInput) : fetchInput;
57+
return _origFetch(maybeProxyInput, fetchInit);
58+
}
59+
60+
function _shareurlRace(fetchInput, fetchInit) {
61+
const proxyCtrl = new AbortController();
62+
const proxyP = _origFetch(_shareurlProxyUrl(fetchInput),
63+
Object.assign({}, fetchInit, { signal: proxyCtrl.signal }));
64+
const directP = _origFetch(fetchInput, fetchInit);
65+
66+
// shouldProxy is decided from direct's headers — a timeout flips us to
67+
// true if direct hangs at the network level (no headers, no error).
68+
const shouldProxyPromise = Promise.race([
69+
directP.then(r => !_shareurlVerifyDirect(r), () => true),
70+
new Promise(resolve => setTimeout(() => resolve(true), SHAREURL_DIRECT_TIMEOUT_MS)),
71+
]);
72+
73+
// Caller's response: direct if its headers verify (and abort the proxy
74+
// fetch to stop wasting server bandwidth); otherwise proxy. If proxy also
75+
// fails, surface its error.
76+
const responsePromise = new Promise((resolve, reject) => {
77+
let gotSomeResponse = false;
78+
directP.then(r => {
79+
if (!gotSomeResponse && _shareurlVerifyDirect(r)) {
80+
gotSomeResponse = true;
81+
proxyCtrl.abort();
82+
resolve(r);
83+
}
84+
}).catch(() => { /* wait for proxy to resolve or reject */ });
85+
proxyP.then(r => {
86+
if (!gotSomeResponse) { gotSomeResponse = true; resolve(r); }
87+
}).catch(e => {
88+
if (!gotSomeResponse) reject(e);
89+
});
90+
});
91+
92+
return { responsePromise, shouldProxyPromise };
93+
}
94+
95+
window.fetch = function(fetchInput, fetchInit) {
96+
let host;
97+
try { host = new URL(_shareurlInputToUrl(fetchInput), window.location.href).hostname; }
98+
catch (_) { return _origFetch(fetchInput, fetchInit); }
99+
if (!SHAREURL_PROXY_HOSTS.has(host)) return _origFetch(fetchInput, fetchInit);
100+
101+
const shouldProxy = _shareurlShouldProxy.get(host);
102+
const inflight = _shareurlShouldProxyInflight.get(host);
103+
if (shouldProxy !== undefined) {
104+
return _shareurlFetch(shouldProxy, fetchInput, fetchInit);
105+
} else if (inflight) {
106+
// shouldProxy pending: queue this fetch on it and issue a single fresh
107+
// request once shouldProxy is decided.
108+
return inflight.then(sp => _shareurlFetch(sp, fetchInput, fetchInit));
109+
} else {
110+
// First fetch to this host this page-load: run the race.
111+
const { responsePromise, shouldProxyPromise } = _shareurlRace(fetchInput, fetchInit);
112+
_shareurlShouldProxyInflight.set(host, shouldProxyPromise);
113+
shouldProxyPromise.then(sp => {
114+
_shareurlShouldProxy.set(host, sp);
115+
_shareurlShouldProxyInflight.delete(host);
116+
});
117+
return responsePromise;
118+
}
25119
};
26120

27121
const isEmbedded = window.parent !== window;

0 commit comments

Comments
 (0)