diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 9f3e69e3ccfd..ee387f28dab9 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2225,8 +2225,11 @@ dependencies = [ "codex-windows-sandbox", "dotenvy", "pretty_assertions", + "serde", + "serde_json", "tempfile", "tokio", + "url", ] [[package]] diff --git a/codex-rs/arg0/Cargo.toml b/codex-rs/arg0/Cargo.toml index bb45db45521a..400e3fbb9ca5 100644 --- a/codex-rs/arg0/Cargo.toml +++ b/codex-rs/arg0/Cargo.toml @@ -23,8 +23,11 @@ codex-shell-escalation = { workspace = true } codex-utils-absolute-path = { workspace = true } codex-utils-home-dir = { workspace = true } dotenvy = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } tempfile = { workspace = true } -tokio = { workspace = true, features = ["rt-multi-thread"] } +tokio = { workspace = true, features = ["net", "rt-multi-thread", "time"] } +url = { workspace = true } [target.'cfg(windows)'.dependencies] codex-windows-sandbox = { workspace = true } diff --git a/codex-rs/arg0/src/conditional_dotenv/conditional_dotenv_tests.rs b/codex-rs/arg0/src/conditional_dotenv/conditional_dotenv_tests.rs new file mode 100644 index 000000000000..f4b5dfb1e69d --- /dev/null +++ b/codex-rs/arg0/src/conditional_dotenv/conditional_dotenv_tests.rs @@ -0,0 +1,281 @@ +use super::*; +use pretty_assertions::assert_eq; +use std::cell::Cell; +use std::cell::RefCell; +use std::collections::BTreeMap; +use std::fs; +use std::time::Duration; + +#[derive(Debug, Default, PartialEq, Eq)] +struct TestEnvironment(BTreeMap); + +impl StartupEnvironment for TestEnvironment { + fn set(&mut self, key: &str, value: &str) { + self.0.insert(key.to_string(), value.to_string()); + } + + fn remove(&mut self, key: &str) { + self.0.remove(key); + } +} + +fn write_overlay(codex_home: &Path, name: &str, contents: &str) -> std::io::Result<()> { + fs::write(codex_home.join(name), contents) +} + +fn test_environment(values: [(&str, &str); N]) -> TestEnvironment { + TestEnvironment(BTreeMap::from( + values.map(|(key, value)| (key.to_string(), value.to_string())), + )) +} + +#[test] +fn proxy_overlays_set_or_unset_environment_for_network() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + write_overlay( + codex_home.path(), + ".env.10-proxy-on", + r#"# codex-env-if: {"type":"tcp_connect","from":"HTTPS_PROXY","timeout_ms":500} +HTTPS_PROXY=http://user:password@proxy.example.com:8080 +HTTP_PROXY=http://proxy.example.com:8080 +ALL_PROXY=http://proxy.example.com:8080 +NO_PROXY=localhost,127.0.0.1,.example.com +"#, + )?; + write_overlay( + codex_home.path(), + ".env.20-proxy-off", + r#"# codex-env-if: {"not":{"type":"tcp_connect","host":"proxy.example.com","port":8080,"timeout_ms":500}} +# codex-env-unset: ["HTTPS_PROXY","HTTP_PROXY","ALL_PROXY","NO_PROXY"] +"#, + )?; + + let on_network_calls = Cell::new(0); + let on_network_connector = |host: &str, port: u16, timeout: Duration| { + on_network_calls.set(on_network_calls.get() + 1); + assert_eq!(host, "proxy.example.com"); + assert_eq!(port, 8080); + assert_eq!(timeout, Duration::from_millis(500)); + true + }; + let mut on_network_environment = test_environment([ + ("HTTPS_PROXY", "http://stale.example.com:8080"), + ("UNRELATED", "preserved"), + ]); + + let warnings = load_conditional_dotenv_overlays( + codex_home.path(), + &mut on_network_environment, + &on_network_connector, + ); + + assert_eq!(on_network_calls.get(), 2); + assert_eq!(warnings, Vec::::new()); + assert_eq!( + on_network_environment, + test_environment([ + ("ALL_PROXY", "http://proxy.example.com:8080"), + ("HTTPS_PROXY", "http://user:password@proxy.example.com:8080"), + ("HTTP_PROXY", "http://proxy.example.com:8080"), + ("NO_PROXY", "localhost,127.0.0.1,.example.com"), + ("UNRELATED", "preserved"), + ]) + ); + + let off_network_calls = Cell::new(0); + let off_network_connector = |host: &str, port: u16, timeout: Duration| { + off_network_calls.set(off_network_calls.get() + 1); + assert_eq!(host, "proxy.example.com"); + assert_eq!(port, 8080); + assert_eq!(timeout, Duration::from_millis(500)); + false + }; + let mut off_network_environment = test_environment([ + ("ALL_PROXY", "http://inherited.example.com:8080"), + ("HTTPS_PROXY", "http://inherited.example.com:8080"), + ("HTTP_PROXY", "http://inherited.example.com:8080"), + ("NO_PROXY", "localhost"), + ("UNRELATED", "preserved"), + ]); + + let warnings = load_conditional_dotenv_overlays( + codex_home.path(), + &mut off_network_environment, + &off_network_connector, + ); + + assert_eq!(off_network_calls.get(), 2); + assert_eq!(warnings, Vec::::new()); + assert_eq!( + off_network_environment, + test_environment([("UNRELATED", "preserved")]) + ); + Ok(()) +} + +#[test] +fn overlay_filters_reserved_names_for_set_and_unset() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + write_overlay( + codex_home.path(), + ".env.proxy", + r#"# codex-env-if: {"type":"tcp_connect","host":"proxy.example.com","port":8080} +# codex-env-unset: ["ALL_PROXY","codex_internal"] +HTTP_PROXY=http://proxy.example.com:8080 +codex_internal=changed +"#, + )?; + let mut environment = test_environment([ + ("ALL_PROXY", "http://old.example.com:8080"), + ("codex_internal", "preserved"), + ]); + + let warnings = + load_conditional_dotenv_overlays(codex_home.path(), &mut environment, &|_, _, _| true); + + assert_eq!(warnings, Vec::::new()); + assert_eq!( + environment, + test_environment([ + ("HTTP_PROXY", "http://proxy.example.com:8080"), + ("codex_internal", "preserved"), + ]) + ); + Ok(()) +} + +#[test] +fn invalid_timeout_fails_closed_without_blocking_valid_overlays() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + write_overlay(codex_home.path(), ".env.example", "SHOULD_NOT_LOAD=1\n")?; + write_overlay( + codex_home.path(), + ".env.10-timeout", + r#"# codex-env-if: {"type":"tcp_connect","host":"proxy.example.com","port":8080,"timeout_ms":5001} +SHOULD_NOT_LOAD=2 +"#, + )?; + write_overlay( + codex_home.path(), + ".env.20-maximum-timeout", + r#"# codex-env-if: {"type":"tcp_connect","host":"proxy.example.com","port":8080,"timeout_ms":5000} +MAXIMUM_TIMEOUT=loaded +"#, + )?; + write_overlay( + codex_home.path(), + ".env.30-default-timeout", + r#"# codex-env-if: {"type":"tcp_connect","host":"proxy.example.com","port":8080} +DEFAULT_TIMEOUT=loaded +"#, + )?; + let observed_timeouts = RefCell::new(Vec::new()); + let connector = |_: &str, _: u16, timeout: Duration| { + observed_timeouts.borrow_mut().push(timeout); + true + }; + let mut environment = TestEnvironment::default(); + + let warnings = + load_conditional_dotenv_overlays(codex_home.path(), &mut environment, &connector); + + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains(".env.10-timeout")); + assert_eq!( + observed_timeouts.into_inner(), + vec![Duration::from_millis(5000), Duration::from_millis(500)] + ); + assert_eq!( + environment, + test_environment([("DEFAULT_TIMEOUT", "loaded"), ("MAXIMUM_TIMEOUT", "loaded"),]) + ); + Ok(()) +} + +#[test] +fn invalid_from_sources_fail_closed_without_connecting() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + write_overlay( + codex_home.path(), + ".env.10-missing-source", + r#"# codex-env-if: {"type":"tcp_connect","from":"HTTPS_PROXY"} +SHOULD_NOT_LOAD=1 +"#, + )?; + write_overlay( + codex_home.path(), + ".env.20-invalid-source", + r#"# codex-env-if: {"type":"tcp_connect","from":"HTTPS_PROXY"} +HTTPS_PROXY="not a url" +"#, + )?; + let connector_called = Cell::new(false); + let connector = |_: &str, _: u16, _: Duration| { + connector_called.set(true); + true + }; + let mut environment = TestEnvironment::default(); + + let warnings = + load_conditional_dotenv_overlays(codex_home.path(), &mut environment, &connector); + + assert!(!connector_called.get()); + assert_eq!(warnings.len(), 2); + assert_eq!(environment, TestEnvironment::default()); + Ok(()) +} + +#[test] +fn directives_inside_multiline_values_are_not_scanned() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + write_overlay( + codex_home.path(), + ".env.multiline", + r#"# codex-env-if: {"type":"tcp_connect","host":"proxy.example.com","port":8080} +# codex-env-unset: ["HTTP_PROXY"] +MESSAGE='before +# codex-env-unset: ["ALL_PROXY"] +# codex-env-if: {"not":{"type":"tcp_connect","host":"other.example.com","port":8080}} +after' +"#, + )?; + let mut environment = test_environment([ + ("ALL_PROXY", "http://proxy.example.com:8080"), + ("HTTP_PROXY", "http://proxy.example.com:8080"), + ]); + + let warnings = + load_conditional_dotenv_overlays(codex_home.path(), &mut environment, &|_, _, _| true); + + assert_eq!(warnings, Vec::::new()); + assert_eq!( + environment, + test_environment([ + ("ALL_PROXY", "http://proxy.example.com:8080"), + ( + "MESSAGE", + "before\n# codex-env-unset: [\"ALL_PROXY\"]\n# codex-env-if: {\"not\":{\"type\":\"tcp_connect\",\"host\":\"other.example.com\",\"port\":8080}}\nafter", + ), + ]) + ); + Ok(()) +} + +#[test] +fn endpoint_parser_supports_urls_bare_authorities_and_known_default_ports() { + assert_eq!( + [ + "http://user:password@proxy.example.com:8080", + "proxy.example.com:8080", + "https://proxy.example.com", + "http://[::1]:8080", + ] + .map(parse_endpoint), + [ + Some(("proxy.example.com".to_string(), 8080)), + Some(("proxy.example.com".to_string(), 8080)), + Some(("proxy.example.com".to_string(), 443)), + Some(("::1".to_string(), 8080)), + ] + ); +} diff --git a/codex-rs/arg0/src/conditional_dotenv/mod.rs b/codex-rs/arg0/src/conditional_dotenv/mod.rs new file mode 100644 index 000000000000..c3ed40179e4e --- /dev/null +++ b/codex-rs/arg0/src/conditional_dotenv/mod.rs @@ -0,0 +1,465 @@ +//! Loads TCP-gated dotenv overlays from `CODEX_HOME` during single-threaded startup. +//! +//! Example `~/.codex/.env.corporate-proxy`: +//! +//! ```dotenv +//! # codex-env-if: {"type":"tcp_connect","from":"HTTPS_PROXY","timeout_ms":500} +//! HTTPS_PROXY=http://proxy.example.com:8080 +//! HTTP_PROXY=http://proxy.example.com:8080 +//! ALL_PROXY=http://proxy.example.com:8080 +//! NO_PROXY=localhost,127.0.0.1,.example.com +//! ``` +//! +//! A TCP condition can be inverted with `{"not":{"type":"tcp_connect",...}}`, which allows a +//! second overlay to unset proxy variables when the endpoint is unreachable. + +use crate::is_reserved_env_var; +use serde::Deserialize; +use std::ffi::OsString; +use std::net::ToSocketAddrs; +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; +use std::process::Stdio; +use std::time::Duration; +use std::time::Instant; +use url::Url; + +pub(crate) const TCP_CONNECT_HELPER_ARG1: &str = "--codex-run-as-tcp-connect-probe"; + +const CONDITIONAL_DOTENV_PREFIX: &str = ".env."; +const CONDITION_DIRECTIVE_PREFIX: &str = "# codex-env-if:"; +const UNSET_DIRECTIVE_PREFIX: &str = "# codex-env-unset:"; +const DEFAULT_TCP_CONNECT_TIMEOUT_MS: u64 = 500; +const MAX_TCP_CONNECT_TIMEOUT_MS: u64 = 5_000; +const TCP_CONNECT_HELPER_POLL_INTERVAL: Duration = Duration::from_millis(10); +const IGNORED_CONDITIONAL_DOTENV_SUFFIXES: &[&str] = &[ + "bak", "back", "backup", "bkp", "old", "orig", "original", "save", "saved", "disable", + "disabled", "inactive", "off", "tmp", "temp", "swp", "swo", "example", "sample", "template", + "dist", +]; + +type TcpConnector<'a> = dyn Fn(&str, u16, Duration) -> bool + 'a; + +pub(crate) fn load(codex_home: &Path) { + let mut environment = ProcessEnvironment; + + for warning in + load_conditional_dotenv_overlays(codex_home, &mut environment, &system_tcp_connect) + { + eprintln!("WARNING: skipped conditional dotenv overlay: {warning}"); + } +} + +/// Runs an OS-resolved TCP probe in the child process used by [`system_tcp_connect`]. +/// +/// This exits before dotenv loading, so its resolver and connection tasks cannot race with the +/// parent's environment mutation. +pub(crate) fn run_tcp_connect_helper(mut args: impl Iterator) -> ! { + let host = args.next().and_then(|value| value.into_string().ok()); + let port = args + .next() + .and_then(|value| value.into_string().ok()) + .and_then(|value| value.parse::().ok()); + let timeout = args + .next() + .and_then(|value| value.into_string().ok()) + .and_then(|value| value.parse::().ok()) + .and_then(|timeout_ms| tcp_connect_timeout(Some(timeout_ms)).ok()); + let connected = match (host, port, timeout) { + (Some(host), Some(port), Some(timeout)) => direct_tcp_connect(&host, port, timeout), + _ => false, + }; + let exit_code = if connected { 0 } else { 1 }; + std::process::exit(exit_code); +} + +fn load_conditional_dotenv_overlays( + codex_home: &Path, + environment: &mut dyn StartupEnvironment, + tcp_connect: &TcpConnector<'_>, +) -> Vec { + let paths = match conditional_dotenv_paths(codex_home) { + Ok(paths) => paths, + Err(err) => { + return vec![format!( + "could not discover overlays in {}: {err}", + codex_home.display() + )]; + } + }; + let mut warnings = Vec::new(); + + for path in paths { + let overlay = match parse_conditional_dotenv(&path) { + Ok(Some(overlay)) => overlay, + Ok(None) => continue, + Err(err) => { + warnings.push(format!("{}: {err}", path.display())); + continue; + } + }; + match evaluate_condition(&overlay.condition, &overlay.entries, tcp_connect) { + Ok(true) => apply_overlay(overlay, environment), + Ok(false) => {} + Err(err) => warnings.push(format!("{}: {err}", path.display())), + } + } + + warnings +} + +fn conditional_dotenv_paths(codex_home: &Path) -> std::io::Result> { + let entries = match std::fs::read_dir(codex_home) { + Ok(entries) => entries, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()), + Err(err) => return Err(err), + }; + let mut paths = Vec::new(); + + for entry in entries { + let entry = entry?; + let file_name = entry.file_name(); + let Some(file_name) = file_name.to_str() else { + continue; + }; + let is_ignored = file_name.ends_with('~') + || file_name.rsplit_once('.').is_some_and(|(_, suffix)| { + IGNORED_CONDITIONAL_DOTENV_SUFFIXES + .iter() + .any(|ignored| suffix.eq_ignore_ascii_case(ignored)) + }); + if file_name.starts_with(CONDITIONAL_DOTENV_PREFIX) + && file_name.len() > CONDITIONAL_DOTENV_PREFIX.len() + && !is_ignored + && entry.path().is_file() + { + paths.push(entry.path()); + } + } + + paths.sort(); + Ok(paths) +} + +fn parse_conditional_dotenv(path: &Path) -> Result, String> { + let contents = + std::fs::read_to_string(path).map_err(|err| format!("could not read file: {err}"))?; + let contents = contents.strip_prefix('\u{feff}').unwrap_or(&contents); + let mut lines = contents + .lines() + .map(|line| line.trim_start_matches('\u{feff}').trim()); + let Some(first_line) = lines.find(|line| !line.is_empty()) else { + return Ok(None); + }; + let Some(condition_json) = first_line.strip_prefix(CONDITION_DIRECTIVE_PREFIX) else { + return Ok(None); + }; + let condition_json = condition_json.trim(); + if condition_json.is_empty() { + return Err("condition directive is empty".to_string()); + } + let condition = serde_json::from_str(condition_json) + .map_err(|err| format!("condition directive is invalid: {err}"))?; + + let mut unset = None; + for line in lines { + if !line.is_empty() && !line.starts_with('#') { + break; + } + if line.starts_with(CONDITION_DIRECTIVE_PREFIX) { + return Err("exactly one condition directive is required".to_string()); + } + if let Some(unset_json) = line.strip_prefix(UNSET_DIRECTIVE_PREFIX) { + if unset.is_some() { + return Err("multiple unset directives are not supported".to_string()); + } + let keys: Vec = serde_json::from_str(unset_json.trim()) + .map_err(|err| format!("unset directive is invalid: {err}"))?; + if let Some(key) = keys.iter().find(|key| !is_valid_env_var_name(key)) { + return Err(format!( + "unset directive contains invalid environment variable name `{key}`" + )); + } + unset = Some(keys); + } + } + + let entries = dotenvy::from_read_iter(contents.as_bytes()) + .collect::, _>>() + .map_err(|_| "file contains an invalid dotenv assignment".to_string())?; + if entries.iter().any(|(_, value)| value.contains('\0')) { + return Err("file contains a dotenv value with a NUL byte".to_string()); + } + + Ok(Some(ConditionalDotenv { + condition, + unset: unset.unwrap_or_default(), + entries, + })) +} + +fn evaluate_condition( + condition: &Condition, + overlay_entries: &[(String, String)], + tcp_connect: &TcpConnector<'_>, +) -> Result { + match condition { + Condition::TcpConnect(condition) => { + evaluate_tcp_connect(condition, overlay_entries, tcp_connect) + } + Condition::Not { not } => { + evaluate_tcp_connect(not, overlay_entries, tcp_connect).map(|connected| !connected) + } + } +} + +fn evaluate_tcp_connect( + condition: &TcpConnectCondition, + overlay_entries: &[(String, String)], + tcp_connect: &TcpConnector<'_>, +) -> Result { + match condition { + TcpConnectCondition::TcpConnect { + from, + host, + port, + timeout_ms, + } => { + let target = parse_tcp_connect_target(from.as_deref(), host.as_deref(), *port)?; + let (host, port) = match target { + TcpConnectTarget::FromVariable(from) => { + let value = overlay_entries + .iter() + .rev() + .find_map(|(key, value)| { + (key == from || (cfg!(windows) && key.eq_ignore_ascii_case(from))) + .then_some(value) + }) + .ok_or_else(|| { + format!( + "tcp_connect source variable `{from}` is not defined in the overlay" + ) + })?; + parse_endpoint(value).ok_or_else(|| { + format!( + "tcp_connect source variable `{from}` does not contain a valid endpoint" + ) + })? + } + TcpConnectTarget::Explicit { host, port } => (host.to_string(), port), + }; + let timeout = tcp_connect_timeout(*timeout_ms)?; + Ok((tcp_connect)(&host, port, timeout)) + } + } +} + +fn apply_overlay(overlay: ConditionalDotenv, environment: &mut dyn StartupEnvironment) { + for key in overlay.unset { + if !is_reserved_env_var(&key) { + environment.remove(&key); + } + } + for (key, value) in overlay.entries { + if !is_reserved_env_var(&key) { + environment.set(&key, &value); + } + } +} + +enum TcpConnectTarget<'a> { + FromVariable(&'a str), + Explicit { host: &'a str, port: u16 }, +} + +fn parse_tcp_connect_target<'a>( + from: Option<&'a str>, + host: Option<&'a str>, + port: Option, +) -> Result, String> { + match (from, host, port) { + (Some(from), None, None) => { + if !is_valid_env_var_name(from) { + return Err(format!( + "tcp_connect contains invalid source variable name `{from}`" + )); + } + Ok(TcpConnectTarget::FromVariable(from)) + } + (None, Some(host), Some(port)) => { + if host.is_empty() { + return Err("tcp_connect host must not be empty".to_string()); + } + if port == 0 { + return Err("tcp_connect port must be greater than zero".to_string()); + } + Ok(TcpConnectTarget::Explicit { host, port }) + } + (Some(_), _, _) => Err( + "tcp_connect must specify either `from` or `host` and `port`, but not both".to_string(), + ), + (None, _, _) => { + Err("tcp_connect requires either `from` or both `host` and `port`".to_string()) + } + } +} + +fn tcp_connect_timeout(timeout_ms: Option) -> Result { + let timeout_ms = timeout_ms.unwrap_or(DEFAULT_TCP_CONNECT_TIMEOUT_MS); + if !(1..=MAX_TCP_CONNECT_TIMEOUT_MS).contains(&timeout_ms) { + return Err(format!( + "tcp_connect timeout_ms must be between 1 and {MAX_TCP_CONNECT_TIMEOUT_MS}" + )); + } + Ok(Duration::from_millis(timeout_ms)) +} + +fn parse_endpoint(value: &str) -> Option<(String, u16)> { + let value = value.trim(); + let parsed = if value.contains("://") { + Url::parse(value).ok()? + } else { + Url::parse(&format!("http://{value}")).ok()? + }; + let host = match parsed.host()? { + url::Host::Ipv6(address) => address.to_string(), + host => host.to_string(), + }; + let port = parsed.port_or_known_default()?; + (port != 0).then_some((host, port)) +} + +fn system_tcp_connect(host: &str, port: u16, timeout: Duration) -> bool { + // `getaddrinfo` cannot be cancelled portably. Isolate it in a child that can be killed and + // reaped without introducing a resolver thread into the environment-mutating parent. + let Ok(current_exe) = std::env::current_exe() else { + return false; + }; + let Ok(mut child) = Command::new(current_exe) + .arg(TCP_CONNECT_HELPER_ARG1) + .arg(host) + .arg(port.to_string()) + .arg(timeout.as_millis().to_string()) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + else { + return false; + }; + + let deadline = Instant::now() + timeout; + loop { + match child.try_wait() { + Ok(Some(status)) => return status.success(), + Ok(None) => {} + Err(_) => break, + } + + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + break; + } + std::thread::sleep(remaining.min(TCP_CONNECT_HELPER_POLL_INTERVAL)); + } + + let _ = child.kill(); + let _ = child.wait(); + false +} + +fn direct_tcp_connect(host: &str, port: u16, timeout: Duration) -> bool { + let deadline = Instant::now() + timeout; + let Ok(addresses) = (host, port) + .to_socket_addrs() + .map(Iterator::collect::>) + else { + return false; + }; + if deadline.saturating_duration_since(Instant::now()).is_zero() { + return false; + } + + let Ok(runtime) = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + else { + return false; + }; + runtime.block_on(async move { + tokio::time::timeout_at(tokio::time::Instant::from_std(deadline), async move { + let mut attempts = tokio::task::JoinSet::new(); + for address in addresses { + attempts.spawn(tokio::net::TcpStream::connect(address)); + } + while let Some(result) = attempts.join_next().await { + if matches!(result, Ok(Ok(_))) { + return true; + } + } + false + }) + .await + .unwrap_or(false) + }) +} + +fn is_valid_env_var_name(key: &str) -> bool { + let mut chars = key.chars(); + chars + .next() + .is_some_and(|first| first.is_ascii_alphabetic() || first == '_') + && chars.all(|character| { + character.is_ascii_alphanumeric() || character == '_' || character == '.' + }) +} + +/// Minimal environment interface used to test overlay application without mutating the test +/// process environment. +trait StartupEnvironment { + fn set(&mut self, key: &str, value: &str); + fn remove(&mut self, key: &str); +} + +struct ProcessEnvironment; + +impl StartupEnvironment for ProcessEnvironment { + fn set(&mut self, key: &str, value: &str) { + // Safety: this loader runs from arg0_dispatch before Codex creates any threads. + unsafe { std::env::set_var(key, value) }; + } + + fn remove(&mut self, key: &str) { + // Safety: this loader runs from arg0_dispatch before Codex creates any threads. + unsafe { std::env::remove_var(key) }; + } +} + +struct ConditionalDotenv { + condition: Condition, + unset: Vec, + entries: Vec<(String, String)>, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged, deny_unknown_fields)] +enum Condition { + TcpConnect(TcpConnectCondition), + Not { not: TcpConnectCondition }, +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case", deny_unknown_fields)] +enum TcpConnectCondition { + TcpConnect { + from: Option, + host: Option, + port: Option, + timeout_ms: Option, + }, +} + +#[cfg(test)] +#[path = "conditional_dotenv_tests.rs"] +mod tests; diff --git a/codex-rs/arg0/src/lib.rs b/codex-rs/arg0/src/lib.rs index 1c28f81d8cbb..383344982296 100644 --- a/codex-rs/arg0/src/lib.rs +++ b/codex-rs/arg0/src/lib.rs @@ -15,6 +15,8 @@ use codex_windows_sandbox::CODEX_WINDOWS_SANDBOX_ARG1; use std::os::unix::fs::symlink; use tempfile::TempDir; +mod conditional_dotenv; + const APPLY_PATCH_ARG0: &str = "apply_patch"; const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch"; #[cfg(unix)] @@ -98,6 +100,9 @@ pub fn arg0_dispatch() -> Option { } let argv1 = args.next().unwrap_or_default(); + if argv1 == conditional_dotenv::TCP_CONNECT_HELPER_ARG1 { + conditional_dotenv::run_tcp_connect_helper(args); + } if argv1 == CODEX_FS_HELPER_ARG1 { codex_exec_server::run_fs_helper_main(); } @@ -191,7 +196,7 @@ fn prepare_path_env_var_with_aliases( /// `codex-linux-sandbox` we *directly* execute /// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we: /// -/// 1. Load `.env` values from `~/.codex/.env` before creating any threads. +/// 1. Load `.env` and matching conditional dotenv values from `~/.codex` before creating threads. /// 2. Spawn a main runtime thread with a controlled stack size. /// 3. Construct a Tokio multi-thread runtime. /// 4. Capture the current executable path and derive the @@ -285,16 +290,18 @@ fn build_runtime() -> anyhow::Result { const ILLEGAL_ENV_VAR_PREFIX: &str = "CODEX_"; -/// Load env vars from ~/.codex/.env. +/// Load base and conditional env vars from the startup Codex home. /// /// Security: Do not allow `.env` files to create or modify any variables /// with names starting with `CODEX_`. fn load_dotenv() { - if let Ok(codex_home) = find_codex_home() - && let Ok(iter) = dotenvy::from_path_iter(codex_home.join(".env")) - { + let Ok(codex_home) = find_codex_home() else { + return; + }; + if let Ok(iter) = dotenvy::from_path_iter(codex_home.join(".env")) { set_filtered(iter); } + conditional_dotenv::load(codex_home.as_path()); } /// Helper to set vars from a dotenvy iterator while filtering out `CODEX_` keys. @@ -303,7 +310,7 @@ where I: IntoIterator>, { for (key, value) in iter.into_iter().flatten() { - if !key.to_ascii_uppercase().starts_with(ILLEGAL_ENV_VAR_PREFIX) { + if !is_reserved_env_var(&key) { // It is safe to call set_var() because our process is // single-threaded at this point in its execution. unsafe { std::env::set_var(&key, &value) }; @@ -311,6 +318,11 @@ where } } +fn is_reserved_env_var(key: &str) -> bool { + key.get(..ILLEGAL_ENV_VAR_PREFIX.len()) + .is_some_and(|prefix| prefix.eq_ignore_ascii_case(ILLEGAL_ENV_VAR_PREFIX)) +} + /// Creates a temporary directory with either: /// /// - UNIX: `apply_patch` symlink to the current executable