diff --git a/codex-rs/utils/pty/Cargo.toml b/codex-rs/utils/pty/Cargo.toml index d03962edc71a..743a40c49d02 100644 --- a/codex-rs/utils/pty/Cargo.toml +++ b/codex-rs/utils/pty/Cargo.toml @@ -21,11 +21,15 @@ lazy_static = { workspace = true } log = { workspace = true } shared_library = "0.1.9" winapi = { version = "0.3.9", features = [ + "fileapi", "handleapi", "jobapi2", "minwinbase", + "namedpipeapi", "processthreadsapi", + "stringapiset", "synchapi", + "sysinfoapi", "winbase", "wincon", "winerror", diff --git a/codex-rs/utils/pty/src/win/command.rs b/codex-rs/utils/pty/src/win/command.rs new file mode 100644 index 000000000000..3a1b6bce67dc --- /dev/null +++ b/codex-rs/utils/pty/src/win/command.rs @@ -0,0 +1,376 @@ +// Portions of this file are adapted from Rust's standard library. +// Copyright The Rust Project Developers. Licensed under Apache-2.0 or MIT. + +use std::cmp::Ordering; +use std::collections::HashMap; +use std::env; +use std::ffi::OsStr; +use std::io; +use std::os::windows::ffi::OsStrExt; +use std::path::Path; +use std::path::PathBuf; + +use winapi::shared::minwindef::TRUE; +use winapi::um::stringapiset::CompareStringOrdinal; + +use self::command_path::current_directory; +use self::command_path::program_exists; +use self::command_path::system_directory; +use self::command_path::to_user_path; +use self::command_path::windows_directory; + +#[path = "command_path.rs"] +mod command_path; + +const CSTR_LESS_THAN: i32 = 1; +const CSTR_EQUAL: i32 = 2; +const CSTR_GREATER_THAN: i32 = 3; + +pub(super) struct PreparedCommand { + pub application: Vec, + pub command_line: Vec, + pub environment: Vec, + pub current_directory: Vec, +} + +pub(super) fn prepare_command( + program: &str, + args: &[String], + cwd: &Path, + env: &HashMap, +) -> io::Result { + if program.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "missing program for pipe spawn", + )); + } + + let resolved_program = resolve_program(program, env)?; + let resolved_program = to_user_path(&resolved_program)?; + let is_batch = has_batch_extension(&resolved_program); + + let (application, mut command_line) = if is_batch { + let command_interpreter = system_directory()?.join("cmd.exe"); + ( + to_user_path(&command_interpreter)?, + batch_command_line(&resolved_program, args)?, + ) + } else { + (resolved_program, regular_command_line(program, args)?) + }; + command_line.push(0); + + Ok(PreparedCommand { + application, + command_line, + environment: environment_block(env)?, + current_directory: current_directory(cwd)?, + }) +} + +fn resolve_program(program: &str, child_env: &HashMap) -> io::Result { + let program = Path::new(program); + let file_name = program.file_name().ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidInput, "program path has no file name") + })?; + if file_name.encode_wide().any(|unit| unit == 0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "program contains a NUL character", + )); + } + + if program + .parent() + .is_some_and(|parent| !parent.as_os_str().is_empty()) + { + return Ok(resolve_program_path(program)); + } + + let search_name = if !file_name.encode_wide().any(|unit| unit == '.' as u16) { + let mut name = file_name.to_os_string(); + name.push(".exe"); + name + } else { + file_name.to_os_string() + }; + + for directory in search_directories(child_env) { + let candidate = directory.join(&search_name); + if program_exists(&candidate) { + return Ok(candidate); + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("program `{}` was not found", program.display()), + )) +} + +fn resolve_program_path(program: &Path) -> PathBuf { + if program + .extension() + .and_then(OsStr::to_str) + .is_some_and(|extension| extension.eq_ignore_ascii_case("exe")) + { + return program.to_owned(); + } + + let mut with_exe = program.as_os_str().to_os_string(); + with_exe.push(".exe"); + let with_exe = PathBuf::from(with_exe); + if program_exists(&with_exe) { + with_exe + } else { + program.to_owned() + } +} + +fn search_directories(child_env: &HashMap) -> Vec { + let mut directories = Vec::new(); + if let Some(path) = get_env_case_insensitive(child_env, "PATH") { + directories + .extend(env::split_paths(OsStr::new(path)).filter(|path| !path.as_os_str().is_empty())); + } + if let Ok(executable) = env::current_exe() + && let Some(parent) = executable.parent() + { + directories.push(parent.to_owned()); + } + if let Ok(directory) = system_directory() { + directories.push(directory); + } + if let Ok(directory) = windows_directory() { + directories.push(directory); + } + if let Some(path) = env::var_os("PATH") { + directories.extend(env::split_paths(&path).filter(|path| !path.as_os_str().is_empty())); + } + directories +} + +fn has_batch_extension(program: &[u16]) -> bool { + const DOT: u16 = b'.' as u16; + const LOWER_A: u16 = b'a' as u16; + const LOWER_B: u16 = b'b' as u16; + const LOWER_C: u16 = b'c' as u16; + const LOWER_D: u16 = b'd' as u16; + const LOWER_M: u16 = b'm' as u16; + const LOWER_T: u16 = b't' as u16; + const UPPER_A: u16 = b'A' as u16; + const UPPER_B: u16 = b'B' as u16; + const UPPER_C: u16 = b'C' as u16; + const UPPER_D: u16 = b'D' as u16; + const UPPER_M: u16 = b'M' as u16; + const UPPER_T: u16 = b'T' as u16; + let program = program.strip_suffix(&[0]).unwrap_or(program); + matches!( + program.get(program.len().saturating_sub(4)..), + Some( + [DOT, LOWER_B | UPPER_B, LOWER_A | UPPER_A, LOWER_T | UPPER_T] + | [DOT, LOWER_C | UPPER_C, LOWER_M | UPPER_M, LOWER_D | UPPER_D] + ) + ) +} + +fn get_env_case_insensitive<'a>( + environment: &'a HashMap, + key: &str, +) -> Option<&'a str> { + environment + .iter() + .filter_map(|(candidate, value)| { + candidate + .eq_ignore_ascii_case(key) + .then_some(value.as_str()) + }) + .last() +} + +fn regular_command_line(program: &str, args: &[String]) -> io::Result> { + if program.contains(['\0', '"']) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "program contains an invalid character", + )); + } + + let mut command_line = Vec::new(); + command_line.push('"' as u16); + command_line.extend(OsStr::new(program).encode_wide()); + command_line.push('"' as u16); + for arg in args { + command_line.push(' ' as u16); + append_regular_arg(arg, &mut command_line)?; + } + Ok(command_line) +} + +fn append_regular_arg(arg: &str, command_line: &mut Vec) -> io::Result<()> { + if arg.contains('\0') { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "argument contains a NUL character", + )); + } + + let quote = arg.is_empty() || arg.contains([' ', '\t']); + if quote { + command_line.push('"' as u16); + } + + let arg: Vec = OsStr::new(arg).encode_wide().collect(); + let mut index = 0; + while index < arg.len() { + let mut backslashes = 0; + while index < arg.len() && arg[index] == '\\' as u16 { + index += 1; + backslashes += 1; + } + + if index == arg.len() { + let count = if quote { backslashes * 2 } else { backslashes }; + command_line.extend(std::iter::repeat_n('\\' as u16, count)); + break; + } + if arg[index] == '"' as u16 { + command_line.extend(std::iter::repeat_n('\\' as u16, backslashes * 2 + 1)); + } else { + command_line.extend(std::iter::repeat_n('\\' as u16, backslashes)); + } + command_line.push(arg[index]); + index += 1; + } + + if quote { + command_line.push('"' as u16); + } + Ok(()) +} + +fn batch_command_line(script: &[u16], args: &[String]) -> io::Result> { + let mut command_line: Vec = OsStr::new("cmd.exe /e:ON /v:OFF /d /c \"") + .encode_wide() + .collect(); + let script = script.strip_suffix(&[0]).unwrap_or(script); + if script.contains(&('"' as u16)) || script.last() == Some(&('\\' as u16)) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Windows file names may not contain a quote or end with a backslash", + )); + } + command_line.push('"' as u16); + command_line.extend_from_slice(script); + command_line.push('"' as u16); + for arg in args { + command_line.push(' ' as u16); + append_batch_arg(arg, &mut command_line)?; + } + command_line.push('"' as u16); + Ok(command_line) +} + +fn append_batch_arg(arg: &str, command_line: &mut Vec) -> io::Result<()> { + if arg.contains(['\0', '\r', '\n']) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "batch-file argument contains an invalid character", + )); + } + + const UNQUOTED_ASCII: &str = r"#$*+-./:?@\_"; + let quote = arg.is_empty() + || arg.ends_with('\\') + || arg.chars().any(|character| { + character.is_control() + || (character.is_ascii() + && !(character.is_ascii_alphanumeric() || UNQUOTED_ASCII.contains(character))) + }); + if quote { + command_line.push('"' as u16); + } + + let mut backslashes = 0; + for unit in OsStr::new(arg).encode_wide() { + if unit == '\\' as u16 { + backslashes += 1; + } else { + if unit == '"' as u16 { + command_line.extend(std::iter::repeat_n('\\' as u16, backslashes)); + command_line.push('"' as u16); + } else if unit == '%' as u16 || unit == '\r' as u16 { + command_line.extend(OsStr::new("%%cd:~,").encode_wide()); + } + backslashes = 0; + } + command_line.push(unit); + } + + if quote { + command_line.extend(std::iter::repeat_n('\\' as u16, backslashes)); + command_line.push('"' as u16); + } + Ok(()) +} + +fn environment_block(environment: &HashMap) -> io::Result> { + let mut variables: Vec<(&String, &String)> = Vec::new(); + for (key, value) in environment { + if let Some((_, previous_value)) = variables + .iter_mut() + .find(|(previous_key, _)| compare_environment_keys(previous_key, key).is_eq()) + { + *previous_value = value; + } else { + variables.push((key, value)); + } + } + variables.sort_by(|(left, _), (right, _)| compare_environment_keys(left, right)); + + let mut block = Vec::new(); + for (key, value) in variables { + if key.contains('\0') || value.contains('\0') { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "environment contains an invalid key or value", + )); + } + block.extend(OsStr::new(key).encode_wide()); + block.push('=' as u16); + block.extend(OsStr::new(value).encode_wide()); + block.push(0); + } + block.push(0); + if block.len() == 1 { + block.push(0); + } + Ok(block) +} + +fn compare_environment_keys(left: &str, right: &str) -> Ordering { + let left: Vec = left.encode_utf16().collect(); + let right: Vec = right.encode_utf16().collect(); + match unsafe { + CompareStringOrdinal( + left.as_ptr(), + left.len() as i32, + right.as_ptr(), + right.len() as i32, + TRUE, + ) + } { + CSTR_LESS_THAN => Ordering::Less, + CSTR_EQUAL => Ordering::Equal, + CSTR_GREATER_THAN => Ordering::Greater, + _ => panic!( + "CompareStringOrdinal failed: {}", + io::Error::last_os_error() + ), + } +} + +#[cfg(test)] +#[path = "command_tests.rs"] +mod tests; diff --git a/codex-rs/utils/pty/src/win/command_path.rs b/codex-rs/utils/pty/src/win/command_path.rs new file mode 100644 index 000000000000..caa7c32e3434 --- /dev/null +++ b/codex-rs/utils/pty/src/win/command_path.rs @@ -0,0 +1,181 @@ +// Portions of this file are adapted from Rust's standard library. +// Copyright The Rust Project Developers. Licensed under Apache-2.0 or MIT. + +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fs; +use std::io; +use std::os::windows::ffi::OsStrExt; +use std::os::windows::ffi::OsStringExt; +use std::path::Path; +use std::path::PathBuf; +use std::ptr; + +use winapi::um::fileapi::GetFileAttributesW; +use winapi::um::fileapi::GetFullPathNameW; +use winapi::um::fileapi::INVALID_FILE_ATTRIBUTES; +use winapi::um::sysinfoapi::GetSystemDirectoryW; +use winapi::um::sysinfoapi::GetWindowsDirectoryW; + +pub(super) fn current_directory(cwd: &Path) -> io::Result> { + if !fs::metadata(cwd)?.is_dir() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("current directory `{}` is not a directory", cwd.display()), + )); + } + + let directory = nul_terminated(cwd.as_os_str())?; + strip_safe_verbatim_prefix(directory) +} + +pub(super) fn program_exists(path: &Path) -> bool { + let Ok(path) = to_user_path(path) else { + return false; + }; + unsafe { GetFileAttributesW(path.as_ptr()) != INVALID_FILE_ATTRIBUTES } +} + +pub(super) fn system_directory() -> io::Result { + system_path(GetSystemDirectoryW) +} + +pub(super) fn to_user_path(path: &Path) -> io::Result> { + from_wide_to_user_path(nul_terminated(path.as_os_str())?) +} + +pub(super) fn windows_directory() -> io::Result { + system_path(GetWindowsDirectoryW) +} + +fn system_path(function: unsafe extern "system" fn(*mut u16, u32) -> u32) -> io::Result { + let mut buffer = vec![0u16; 260]; + loop { + let length = unsafe { function(buffer.as_mut_ptr(), buffer.len() as u32) } as usize; + if length == 0 { + return Err(io::Error::last_os_error()); + } + if length < buffer.len() { + buffer.truncate(length); + return Ok(PathBuf::from(OsString::from_wide(&buffer))); + } + buffer.resize(length.saturating_add(1), 0); + } +} + +fn from_wide_to_user_path(path: Vec) -> io::Result> { + let path = if path.len() <= 260 { + strip_safe_verbatim_prefix(path)? + } else { + path + }; + get_long_path(path, /*prefer_verbatim*/ false) +} + +fn strip_safe_verbatim_prefix(mut path: Vec) -> io::Result> { + const SEP: u16 = b'\\' as u16; + const QUERY: u16 = b'?' as u16; + const COLON: u16 = b':' as u16; + const U: u16 = b'U' as u16; + const N: u16 = b'N' as u16; + const C: u16 = b'C' as u16; + match path.as_slice() { + [SEP, SEP, QUERY, SEP, _, COLON, SEP, ..] => { + let candidate = path[4..].to_vec(); + if full_path_matches(&candidate)? { + return Ok(candidate); + } + } + [SEP, SEP, QUERY, SEP, U, N, C, SEP, ..] => { + path[6] = SEP; + let candidate = path[6..].to_vec(); + if full_path_matches(&candidate)? { + return Ok(candidate); + } + path[6] = C; + } + _ => {} + } + Ok(path) +} + +fn full_path_matches(candidate: &[u16]) -> io::Result { + let expected = candidate.strip_suffix(&[0]).unwrap_or(candidate); + Ok(get_full_path_name(candidate)? == expected) +} + +fn get_long_path(path: Vec, prefer_verbatim: bool) -> io::Result> { + const SEP: u16 = b'\\' as u16; + const QUERY: u16 = b'?' as u16; + const DOT: u16 = b'.' as u16; + const COLON: u16 = b':' as u16; + const LEGACY_MAX_PATH: usize = 248; + + if path == [0] + || path.starts_with(&[SEP, SEP, QUERY, SEP]) + || path.starts_with(&[SEP, QUERY, QUERY, SEP]) + { + return Ok(path); + } + let is_separator = |unit| unit == SEP || unit == b'/' as u16; + let is_drive_absolute = + path.get(1) == Some(&COLON) && path.get(2).is_some_and(|unit| is_separator(*unit)); + let is_unc = path.first().is_some_and(|unit| is_separator(*unit)) + && path.get(1).is_some_and(|unit| is_separator(*unit)); + if path.len() < LEGACY_MAX_PATH && (is_drive_absolute || is_unc) { + return Ok(path); + } + + let absolute = get_full_path_name(&path)?; + if !prefer_verbatim && absolute.len() + 1 < LEGACY_MAX_PATH { + return Ok(absolute.into_iter().chain([0]).collect()); + } + + let mut verbatim = Vec::with_capacity(absolute.len().saturating_add(8)); + verbatim.extend([SEP, SEP, QUERY, SEP]); + match absolute.as_slice() { + [_, COLON, SEP, ..] => verbatim.extend_from_slice(&absolute), + [SEP, SEP, DOT, SEP, rest @ ..] => verbatim.extend_from_slice(rest), + [SEP, SEP, rest @ ..] => { + verbatim.extend([b'U' as u16, b'N' as u16, b'C' as u16, SEP]); + verbatim.extend_from_slice(rest); + } + _ => return Ok(absolute.into_iter().chain([0]).collect()), + } + verbatim.push(0); + Ok(verbatim) +} + +fn get_full_path_name(path: &[u16]) -> io::Result> { + let mut buffer = vec![0u16; 260]; + loop { + let length = unsafe { + GetFullPathNameW( + path.as_ptr(), + buffer.len() as u32, + buffer.as_mut_ptr(), + ptr::null_mut(), + ) + } as usize; + if length == 0 { + return Err(io::Error::last_os_error()); + } + if length < buffer.len() { + buffer.truncate(length); + return Ok(buffer); + } + buffer.resize(length.saturating_add(1), 0); + } +} + +fn nul_terminated(value: &OsStr) -> io::Result> { + let mut wide: Vec = value.encode_wide().collect(); + if wide.contains(&0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "value contains a NUL character", + )); + } + wide.push(0); + Ok(wide) +} diff --git a/codex-rs/utils/pty/src/win/command_tests.rs b/codex-rs/utils/pty/src/win/command_tests.rs new file mode 100644 index 000000000000..88772ec37e21 --- /dev/null +++ b/codex-rs/utils/pty/src/win/command_tests.rs @@ -0,0 +1,134 @@ +use std::collections::HashMap; +use std::ffi::OsString; +use std::fs; +use std::io; +use std::os::windows::ffi::OsStringExt; +use std::path::PathBuf; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; + +use pretty_assertions::assert_eq; + +use super::environment_block; +use super::prepare_command; + +struct TestDirectory { + path: PathBuf, +} + +impl TestDirectory { + fn new() -> io::Result { + static NEXT_DIRECTORY: AtomicU64 = AtomicU64::new(0); + + let path = std::env::temp_dir().join(format!( + "codex-utils-pty-command-{}-{}", + std::process::id(), + NEXT_DIRECTORY.fetch_add(1, Ordering::Relaxed) + )); + fs::create_dir(&path)?; + Ok(Self { path }) + } + + fn join(&self, name: &str) -> PathBuf { + self.path.join(name) + } +} + +impl Drop for TestDirectory { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } +} + +fn wide_string(value: &[u16]) -> String { + let value = value.strip_suffix(&[0]).unwrap_or(value); + OsString::from_wide(value).to_string_lossy().into_owned() +} + +#[test] +fn child_path_resolution_only_appends_exe_without_an_extension() -> anyhow::Result<()> { + let directory = TestDirectory::new()?; + let executable = directory.join("path-probe.exe"); + fs::write(&executable, [])?; + fs::write(directory.join("batch-only.cmd"), "@exit /b 0\r\n")?; + let env = HashMap::from([( + "Path".to_string(), + directory.path.to_string_lossy().into_owned(), + )]); + let cwd = std::env::current_dir()?; + + let command = prepare_command("path-probe", &[], &cwd, &env)?; + assert_eq!(PathBuf::from(wide_string(&command.application)), executable); + assert_eq!(PathBuf::from(wide_string(&command.current_directory)), cwd); + assert!(command.environment.ends_with(&[0, 0])); + + let Err(error) = prepare_command("batch-only", &[], &cwd, &env) else { + anyhow::bail!("extensionless program unexpectedly resolved to a batch file"); + }; + assert_eq!(error.kind(), io::ErrorKind::NotFound); + Ok(()) +} + +#[test] +fn batch_command_line_uses_cmd_and_escapes_percent() -> anyhow::Result<()> { + let directory = TestDirectory::new()?; + let script = directory.join("probe.cmd"); + fs::write(&script, "@exit /b 0\r\n")?; + let command = prepare_command( + script.to_string_lossy().as_ref(), + &["100%".to_string()], + &directory.path, + &HashMap::new(), + )?; + + assert!(wide_string(&command.application).ends_with(r"\cmd.exe")); + assert_eq!( + wide_string(&command.command_line), + format!( + "cmd.exe /e:ON /v:OFF /d /c \"\"{}\" \"100%%cd:~,%\"\"", + script.display() + ) + ); + Ok(()) +} + +#[test] +fn environment_block_is_unicode_casefold_sorted_and_double_terminated() -> anyhow::Result<()> { + let environment = HashMap::from([ + ("zebra".to_string(), "3".to_string()), + ("éclair".to_string(), "2".to_string()), + ("Alpha".to_string(), "1".to_string()), + ("=C:".to_string(), r"C:\work".to_string()), + ]); + + let block = environment_block(&environment)?; + let variables = block + .split(|unit| *unit == 0) + .filter(|variable| !variable.is_empty()) + .map(|variable| OsString::from_wide(variable).to_string_lossy().into_owned()) + .collect::>(); + assert_eq!( + variables, + vec![r"=C:=C:\work", "Alpha=1", "zebra=3", "éclair=2"] + ); + assert!(block.ends_with(&[0, 0])); + Ok(()) +} + +#[test] +fn command_preparation_rejects_a_file_as_current_directory() -> anyhow::Result<()> { + let directory = TestDirectory::new()?; + let not_a_directory = directory.join("file"); + fs::write(¬_a_directory, [])?; + + let Err(error) = prepare_command( + r"C:\codex-nonexistent.exe", + &[], + ¬_a_directory, + &HashMap::new(), + ) else { + anyhow::bail!("file unexpectedly accepted as the current directory"); + }; + assert_eq!(error.kind(), io::ErrorKind::InvalidInput); + Ok(()) +} diff --git a/codex-rs/utils/pty/src/win/mod.rs b/codex-rs/utils/pty/src/win/mod.rs index 33af20d8e128..d91152e5db99 100644 --- a/codex-rs/utils/pty/src/win/mod.rs +++ b/codex-rs/utils/pty/src/win/mod.rs @@ -40,6 +40,8 @@ use winapi::um::synchapi::WaitForSingleObject; use winapi::um::winbase::INFINITE; use winapi::um::winbase::WAIT_OBJECT_0; +#[cfg(test)] +mod command; pub(crate) mod conpty; mod job; mod procthreadattr;