Skip to content

Commit 95c4a05

Browse files
RajivTSmeta-codesync[bot]
authored andcommitted
Tunnel GitHub LFS through HTTPS forward proxy
Summary: Fixes the `Network is unreachable (os error 101)` failure observed on par-msl/jarvis E2E (Sandcastle workflow 1130403506487189108) where gitimport's hyper-openssl HTTPS client tried to reach `github.com:443` directly from a Sandcastle worker. Workers can't route external traffic without going through `http://fwdproxy:8080` — git/curl auto-pick up `https_proxy` env, but Rust hyper-openssl does not. The GitHub LFS Batch client now wraps its `HttpsConnector` in `hyper_proxy2::ProxyConnector` so each batch POST / signed-URL GET tunnels through the proxy via HTTP CONNECT before the TLS handshake to github.com runs over that tunnel. **Default**: `http://fwdproxy:8080` (Meta's prod forward proxy). Reaching github.com from any Meta host requires this, so making it the default avoids the footgun of "I forgot to pass --github-lfs-https-proxy and now my run fails after 5 minutes of LFS retries". Override via `--github-lfs-https-proxy <URL>` for a different proxy, or pass `--github-lfs-no-https-proxy` to disable entirely (useful for OSS / outside-Meta runs where github.com is directly reachable). Implementation notes: - New `build_github_https_client(proxy_url: Option<String>)` returns `Client<ProxyConnector<HttpsConnector<HttpConnector>>, _>`. `GitHubLfs.client` type changes accordingly. The other LFS modes (Upstream, Internal) are untouched — they talk to intra-DC endpoints and don't need a proxy. - `from_proxy_unsecured(inner, proxy)` is used because fwdproxy is plain `http://`; the inner `HttpsConnector` still handles TLS to the actual target after CONNECT establishes the tunnel. - `Intercept::None` (used when proxy is `None`) keeps a single client type whether or not a proxy is configured — avoids a sum type on the client field. Dummy `http://unused.invalid` URI is required by the API but never dialed. - Both new flags require `--github-lfs-url` (clap `requires`), matching the existing `--github-lfs-token-file` constraint. - CLI surface: `--github-lfs-https-proxy <URL>` (default `http://fwdproxy:8080`) + `--github-lfs-no-https-proxy` (bool, opt-out). WWW Sandcastle (`SandcastleGithubMirrorSyncCommand`) doesn't need any change — the default kicks in automatically. Reviewed By: lmvasquezg Differential Revision: D106645417 fbshipit-source-id: 8055238d474ddf9f16b821c5515c17940928b006
1 parent 8a0a09f commit 95c4a05

4 files changed

Lines changed: 77 additions & 11 deletions

File tree

eden/mononoke/git/gitimport/src/main.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,25 @@ struct GitimportArgs {
242242
/// hours). Required when `--github-lfs-url` is set.
243243
#[clap(long, requires = "github_lfs_url")]
244244
github_lfs_token_file: Option<PathBuf>,
245+
/// HTTP forward proxy URL used to tunnel GitHub LFS Batch API requests
246+
/// through via HTTP CONNECT. Defaults to Meta's prod forward proxy
247+
/// (`http://fwdproxy:8080`), which is required to reach github.com from
248+
/// any Meta host (Sandcastle workers, devservers, OD sandboxes). Pair
249+
/// with `--github-lfs-no-https-proxy` to disable proxying entirely
250+
/// (e.g. when running outside Meta in OSS environments where github.com
251+
/// is reachable directly). Only applies to the `--github-lfs-url` code
252+
/// path; the other LFS modes ignore it.
253+
#[clap(
254+
long,
255+
requires = "github_lfs_url",
256+
default_value = "http://fwdproxy:8080"
257+
)]
258+
github_lfs_https_proxy: String,
259+
/// Disable HTTPS proxying for GitHub LFS Batch requests, overriding the
260+
/// default `--github-lfs-https-proxy`. Use when running outside Meta
261+
/// (OSS) where github.com is reachable directly without a forward proxy.
262+
#[clap(long, requires = "github_lfs_url")]
263+
github_lfs_no_https_proxy: bool,
245264
/// TLS parameters for this service used for outbound LFS connections
246265
#[clap(flatten)]
247266
tls_args: Option<TLSArgs>,
@@ -407,9 +426,15 @@ async fn async_main(app: MononokeApp) -> Result<(), Error> {
407426
"--github-lfs-token-file is required when --github-lfs-url is set",
408427
)
409428
})?;
429+
let https_proxy = if args.github_lfs_no_https_proxy {
430+
None
431+
} else {
432+
Some(args.github_lfs_https_proxy)
433+
};
410434
GitImportLfs::new_github(
411435
github_lfs_url,
412436
token_file,
437+
https_proxy,
413438
args.allow_dangling_lfs_pointers,
414439
args.lfs_import_max_attempts,
415440
Some(LFS_SIMULTANEOUS_CONNECTION_LIMIT),

eden/mononoke/git/import_tools/BUCK

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ rust_library(
2727
"fbsource//third-party/rust:http",
2828
"fbsource//third-party/rust:http-body-util",
2929
"fbsource//third-party/rust:hyper-openssl",
30+
"fbsource//third-party/rust:hyper-proxy2",
3031
"fbsource//third-party/rust:hyper-util",
3132
"fbsource//third-party/rust:linked-hash-map",
3233
"fbsource//third-party/rust:openssl",

eden/mononoke/git/import_tools/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ gix-object = "0.60.0"
3636
http = "1.4.0"
3737
http-body-util = "0.1.0"
3838
hyper-openssl = { version = "0.10", features = ["client-legacy", "tokio"] }
39+
hyper-proxy2 = { version = "0.1", features = ["rustls"], default-features = false }
3940
hyper-util = { version = "0.1.20", features = ["client-legacy", "http1", "http2", "server-auto", "service", "tokio"] }
4041
lfs_protocol = { version = "0.1.0", path = "../../common/lfs_protocol" }
4142
linked-hash-map = { version = "0.5", features = ["serde_impl"] }

eden/mononoke/git/import_tools/src/gitlfs.rs

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ use http::Uri;
3636
use http_body_util::BodyExt as _;
3737
use http_body_util::Full;
3838
use hyper_openssl::client::legacy::HttpsConnector;
39+
use hyper_proxy2::Intercept;
40+
use hyper_proxy2::Proxy;
41+
use hyper_proxy2::ProxyConnector;
3942
use hyper_util::client::legacy::Client;
4043
use hyper_util::client::legacy::connect::HttpConnector;
4144
use hyper_util::rt::TokioExecutor;
@@ -72,6 +75,13 @@ use tracing::warn;
7275
fn build_https_client(
7376
tls_args: Option<TLSArgs>,
7477
) -> Result<Client<HttpsConnector<HttpConnector>, Full<Bytes>>, Error> {
78+
let connector = build_https_connector(tls_args)?;
79+
Ok(Client::builder(TokioExecutor::new()).build(connector))
80+
}
81+
82+
fn build_https_connector(
83+
tls_args: Option<TLSArgs>,
84+
) -> Result<HttpsConnector<HttpConnector>, Error> {
7585
let mut ssl_connector = SslConnector::builder(SslMethod::tls_client())?;
7686
if let Some(tls_args) = tls_args {
7787
ssl_connector.set_ca_file(tls_args.tls_ca)?;
@@ -80,9 +90,33 @@ fn build_https_client(
8090
};
8191
let mut http_connector = HttpConnector::new();
8292
http_connector.enforce_http(false);
83-
let connector =
84-
HttpsConnector::with_connector(http_connector, ssl_connector).map_err(Error::from)?;
85-
Ok(Client::builder(TokioExecutor::new()).build(connector))
93+
HttpsConnector::with_connector(http_connector, ssl_connector).map_err(Error::from)
94+
}
95+
96+
/// Builds an HTTPS client for the GitHub LFS path, optionally tunneling
97+
/// HTTPS through an `http://...` forward proxy via CONNECT. Required when
98+
/// running inside Meta's prod fleet (e.g. Sandcastle workers), where direct
99+
/// outbound traffic to github.com is blocked and must go through
100+
/// `http://fwdproxy:8080`. When `proxy_url` is `None` the proxy connector
101+
/// is configured with `Intercept::None`, which makes it a no-op pass-through
102+
/// over the underlying `HttpsConnector` (the only reason it's still wrapped
103+
/// is to keep one client type regardless of whether a proxy is configured).
104+
fn build_github_https_client(
105+
proxy_url: Option<String>,
106+
) -> Result<Client<ProxyConnector<HttpsConnector<HttpConnector>>, Full<Bytes>>, Error> {
107+
let inner = build_https_connector(None)?;
108+
let (intercept, proxy_uri) = match proxy_url {
109+
Some(url) => {
110+
let uri = url
111+
.parse::<Uri>()
112+
.with_context(|| format!("parsing --https-proxy URL {url}"))?;
113+
(Intercept::All, uri)
114+
}
115+
None => (Intercept::None, "http://unused.invalid".parse::<Uri>()?),
116+
};
117+
let proxy_connector =
118+
ProxyConnector::from_proxy_unsecured(inner, Proxy::new(intercept, proxy_uri));
119+
Ok(Client::builder(TokioExecutor::new()).build(proxy_connector))
86120
}
87121

88122
/// URL pattern used by the upstream LFS server to serve a single object keyed by SHA256.
@@ -187,8 +221,10 @@ pub struct GitHubLfs {
187221
/// GitHub's per-installation rate limits when importing wide trees).
188222
conn_limit_sem: Option<Arc<Semaphore>>,
189223
/// HTTPS client built with the system trust store (no Meta mTLS) so it
190-
/// can talk to github.com.
191-
client: Client<HttpsConnector<HttpConnector>, Full<Bytes>>,
224+
/// can talk to github.com, optionally tunneling through an `http://...`
225+
/// forward proxy via CONNECT (required from Sandcastle / prod workers
226+
/// where direct outbound traffic to github.com is blocked).
227+
client: Client<ProxyConnector<HttpsConnector<HttpConnector>>, Full<Bytes>>,
192228
}
193229

194230
impl fmt::Debug for GitHubLfs {
@@ -303,19 +339,22 @@ impl GitImportLfs {
303339
/// Build a `GitImportLfs` that fetches LFS objects from a GitHub LFS Batch
304340
/// API endpoint authenticated with a GitHub App installation token read
305341
/// from `token_file`. The HTTPS client is built with the system trust
306-
/// store (not Meta mTLS) since it talks to github.com. The token is read
307-
/// lazily on first use and re-read from disk after a 401/403, so an
308-
/// out-of-process refresher can rotate the file while gitimport is
309-
/// running (installation tokens expire after ~1h; large imports take
310-
/// several hours).
342+
/// store (not Meta mTLS) since it talks to github.com, optionally
343+
/// tunneling through `https_proxy_url` via HTTP CONNECT (required from
344+
/// Sandcastle / prod workers where direct outbound traffic to github.com
345+
/// is blocked). The token is read lazily on first use and re-read from
346+
/// disk after a 401/403, so an out-of-process refresher can rotate the
347+
/// file while gitimport is running (installation tokens expire after
348+
/// ~1h; large imports take several hours).
311349
pub fn new_github(
312350
batch_url: String,
313351
token_file: PathBuf,
352+
https_proxy_url: Option<String>,
314353
allow_not_found: bool,
315354
max_attempts: u32,
316355
conn_limit: Option<usize>,
317356
) -> Result<Self, Error> {
318-
let client = build_https_client(None)?;
357+
let client = build_github_https_client(https_proxy_url)?;
319358
let github = GitHubLfs {
320359
batch_url,
321360
token_file,

0 commit comments

Comments
 (0)