diff --git a/codex-rs/app-server/tests/common/test_app_server.rs b/codex-rs/app-server/tests/common/test_app_server.rs index fbe43e7689b3..f384c3f0af04 100644 --- a/codex-rs/app-server/tests/common/test_app_server.rs +++ b/codex-rs/app-server/tests/common/test_app_server.rs @@ -111,6 +111,7 @@ use codex_app_server_protocol::TurnCompletedNotification; use codex_app_server_protocol::TurnEnvironmentParams; use codex_app_server_protocol::TurnInterruptParams; use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnSteerParams; use codex_app_server_protocol::WindowsSandboxSetupStartParams; use codex_exec_server::CODEX_EXEC_SERVER_NOISE_AUTH_TOKEN_ENV_VAR; @@ -187,15 +188,21 @@ impl TestAppServer { Ok(app_server) } + /// Returns the automatically selected test environment retained by this server. + /// + /// Tests can use the environment to arrange target-native filesystem fixtures before starting + /// a thread. Returns an error unless this server was created with [`Self::new_with_auto_env`]. + pub fn auto_env(&self) -> anyhow::Result<&TestEnv> { + self.auto_env + .as_ref() + .context("auto environment is unavailable; use TestAppServer::new_with_auto_env") + } + /// Returns app-server protocol parameters for the automatically selected /// test environment. Returns an error unless this server was created with /// [`Self::new_with_auto_env`]. pub fn auto_env_params(&self) -> anyhow::Result { - let selection = self - .auto_env - .as_ref() - .context("auto environment is unavailable; use TestAppServer::new_with_auto_env")? - .selection(); + let selection = self.auto_env()?.selection(); Ok(TurnEnvironmentParams { environment_id: selection.environment_id.clone(), cwd: selection.cwd.clone().into(), @@ -997,6 +1004,39 @@ impl TestAppServer { self.send_request("turn/start", params).await } + /// Start a turn and return its matching typed completion notification. + pub async fn start_turn_and_wait_for_completion( + &mut self, + params: TurnStartParams, + ) -> anyhow::Result { + let thread_id = params.thread_id.clone(); + let request_id = self.send_turn_start_request(params).await?; + let response = self + .read_stream_until_response_message(RequestId::Integer(request_id)) + .await?; + let TurnStartResponse { turn } = crate::to_response(response)?; + let notification = self + .read_stream_until_matching_notification( + "turn/completed for started turn", + |notification| { + notification.method == "turn/completed" + && notification.params.as_ref().is_some_and(|params| { + serde_json::from_value::(params.clone()) + .is_ok_and(|completed| { + completed.thread_id == thread_id && completed.turn.id == turn.id + }) + }) + }, + ) + .await?; + let params = notification + .params + .context("turn/completed notification must include params")?; + let completed = serde_json::from_value(params) + .context("failed to deserialize turn/completed notification")?; + Ok(completed) + } + /// Send a `thread/inject_items` JSON-RPC request (v2). pub async fn send_thread_inject_items_request( &mut self, diff --git a/codex-rs/app-server/tests/suite/v2/mod.rs b/codex-rs/app-server/tests/suite/v2/mod.rs index bbd995275463..85dcfc0c4966 100644 --- a/codex-rs/app-server/tests/suite/v2/mod.rs +++ b/codex-rs/app-server/tests/suite/v2/mod.rs @@ -56,6 +56,7 @@ mod request_user_input; mod request_validation; mod review; mod safety_check_downgrade; +mod selected_environment; mod skills_list; mod sleep; mod thread_archive; diff --git a/codex-rs/app-server/tests/suite/v2/selected_environment.rs b/codex-rs/app-server/tests/suite/v2/selected_environment.rs new file mode 100644 index 000000000000..c52122c03242 --- /dev/null +++ b/codex-rs/app-server/tests/suite/v2/selected_environment.rs @@ -0,0 +1,259 @@ +use std::collections::BTreeMap; +use std::path::PathBuf; +use std::time::Duration; + +use anyhow::Context; +use anyhow::Result; +use app_test_support::PathBufExt; +use app_test_support::TestAppServer; +use app_test_support::to_response; +use app_test_support::write_mock_responses_config_toml; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::ThreadStartParams; +use codex_app_server_protocol::ThreadStartResponse; +use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::UserInput as V2UserInput; +use codex_utils_path_uri::PathUri; +use core_test_support::responses; +use pretty_assertions::assert_eq; +use tempfile::TempDir; +use tokio::time::timeout; +use wiremock::MockServer; + +const AGENTS_INSTRUCTIONS: &str = "selected environment workspace instructions"; +const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10); + +struct SelectedEnvironmentFixture { + app_server: TestAppServer, + agents_source: PathUri, + environment_cwd: PathUri, + environment_shell: String, + response_mock: responses::ResponseMock, + codex_home: TempDir, + _server: MockServer, +} + +impl SelectedEnvironmentFixture { + async fn new() -> Result { + let server = responses::start_mock_server().await; + let response_mock = responses::mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_assistant_message("msg-1", "done"), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let codex_home = TempDir::new()?; + write_mock_responses_config_toml( + codex_home.path(), + &server.uri(), + &BTreeMap::new(), + /*auto_compact_limit*/ 100_000, + /*requires_openai_auth*/ None, + "mock_provider", + "compact", + )?; + + let mut app_server = TestAppServer::new_with_auto_env(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, app_server.initialize()).await??; + + let (agents_source, environment_cwd, environment_shell) = { + let auto_env = app_server.auto_env()?; + let environment_cwd = auto_env.selection().cwd.clone(); + let agents_source = environment_cwd.join("AGENTS.md")?; + auto_env + .environment() + .get_filesystem() + .write_file( + &agents_source, + AGENTS_INSTRUCTIONS.as_bytes().to_vec(), + /*sandbox*/ None, + ) + .await?; + let environment_shell = auto_env.environment().info().await?.shell.name; + (agents_source, environment_cwd, environment_shell) + }; + + Ok(Self { + app_server, + agents_source, + environment_cwd, + environment_shell, + response_mock, + codex_home, + _server: server, + }) + } + + async fn start_thread(&mut self) -> Result { + let request_id = self + .app_server + .send_thread_start_request_with_auto_env(ThreadStartParams::default()) + .await?; + let response: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + self.app_server + .read_stream_until_response_message(RequestId::Integer(request_id)), + ) + .await??; + to_response(response) + } +} + +fn text_turn_params(thread_id: String, prompt: &str) -> TurnStartParams { + TurnStartParams { + thread_id, + input: vec![V2UserInput::Text { + text: prompt.to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + } +} + +#[tokio::test] +async fn thread_start_reports_selected_environment_metadata() -> Result<()> { + let mut fixture = SelectedEnvironmentFixture::new().await?; + let ThreadStartResponse { + cwd, + runtime_workspace_roots, + active_permission_profile, + .. + } = fixture.start_thread().await?; + let host_cwd = fixture + .codex_home + .path() + .to_path_buf() + .abs() + .canonicalize()?; + let cwd = cwd.canonicalize()?; + let runtime_workspace_roots = runtime_workspace_roots + .into_iter() + .map(|root| root.canonicalize()) + .collect::>>()?; + assert_eq!( + (cwd, runtime_workspace_roots, active_permission_profile), + ( + // TODO(anp): Return the selected environment's native cwd from thread/start. + host_cwd.clone(), + // TODO(anp): Derive runtime workspace roots from the selected remote environment. + vec![host_cwd], + // TODO(anp): Report the implicit built-in permission profile instead of None. + None, + ) + ); + + Ok(()) +} + +#[tokio::test] +async fn thread_start_reports_selected_environment_instruction_source() -> Result<()> { + let mut fixture = SelectedEnvironmentFixture::new().await?; + let response = fixture.start_thread().await?; + + assert_eq!( + response.instruction_sources, + vec![fixture.agents_source.clone().into()] + ); + timeout( + DEFAULT_READ_TIMEOUT, + fixture + .app_server + .start_turn_and_wait_for_completion(text_turn_params( + response.thread.id, + "inspect workspace instructions", + )), + ) + .await??; + + let user_context = fixture + .response_mock + .single_request() + .message_input_texts("user"); + let instructions = user_context + .iter() + .find(|text| text.starts_with("# AGENTS.md instructions")) + .context("selected environment instructions should be model visible")?; + let expected_instructions = format!( + "# AGENTS.md instructions for {}\n\n\n{AGENTS_INSTRUCTIONS}\n", + fixture.environment_cwd.inferred_native_path_string() + ); + assert_eq!(instructions, &expected_instructions); + + Ok(()) +} + +#[tokio::test] +async fn turn_model_context_uses_selected_environment() -> Result<()> { + let mut fixture = SelectedEnvironmentFixture::new().await?; + let thread = fixture.start_thread().await?.thread; + timeout( + DEFAULT_READ_TIMEOUT, + fixture + .app_server + .start_turn_and_wait_for_completion(text_turn_params( + thread.id, + "inspect the selected environment", + )), + ) + .await??; + + let user_context = fixture + .response_mock + .single_request() + .message_input_texts("user"); + let environment_context = user_context + .iter() + .find(|text| text.starts_with("")) + .context("selected environment context should be model visible")?; + let shell = environment_context + .lines() + .find(|line| line.trim_start().starts_with("")) + .map(str::trim) + .map(str::to_string); + let cwd = environment_context + .lines() + .find(|line| line.trim_start().starts_with("")) + .map(str::trim) + .map(str::to_string); + assert_eq!( + (shell, cwd), + ( + Some(format!("{}", fixture.environment_shell)), + Some(format!( + "{}", + fixture.environment_cwd.inferred_native_path_string() + )), + ) + ); + let host_cwd = fixture + .codex_home + .path() + .to_path_buf() + .abs() + .canonicalize()?; + let model_workspace_root = environment_context + .split_once("") + .and_then(|(_, rest)| rest.split_once("")) + .map(|(root, _)| { + // Decode ampersands last so entity-like path text stays literal. + PathBuf::from( + root.replace("<", "<") + .replace(">", ">") + .replace(""", "\"") + .replace("'", "'") + .replace("&", "&"), + ) + .abs() + }) + .context("model context should include a workspace root")? + .canonicalize()?; + // TODO(anp): Derive model-visible workspace roots from the selected remote environment and + // render them using its native path convention. + assert_eq!(model_workspace_root, host_cwd); + + Ok(()) +} diff --git a/codex-rs/core/tests/remote_env_windows/BUILD.bazel b/codex-rs/core/tests/remote_env_windows/BUILD.bazel index 9b5ce52bd922..1f2465ffcbf0 100644 --- a/codex-rs/core/tests/remote_env_windows/BUILD.bazel +++ b/codex-rs/core/tests/remote_env_windows/BUILD.bazel @@ -6,15 +6,10 @@ wine_rust_test( srcs = ["remote_env_windows_test.rs"], crate_name = "remote_env_windows_test", crate_root = "remote_env_windows_test.rs", - host_binaries = { - "codex-app-server": "//codex-rs/app-server:codex-app-server", - }, windows_binaries = { "wine-windows-exec-server": "//codex-rs/exec-server/testing:windows-exec-server", }, deps = [ - "//codex-rs/app-server-protocol", - "//codex-rs/app-server/tests/common", "//codex-rs/core/tests/common", "//codex-rs/exec-server", "//codex-rs/exec-server/testing:wine-exec-server-test-support", @@ -24,7 +19,6 @@ wine_rust_test( "@crates//:anyhow", "@crates//:pretty_assertions", "@crates//:serde_json", - "@crates//:tempfile", "@crates//:tokio", ], ) diff --git a/codex-rs/core/tests/remote_env_windows/remote_env_windows_test.rs b/codex-rs/core/tests/remote_env_windows/remote_env_windows_test.rs index 04f8914982b1..ff8031606109 100644 --- a/codex-rs/core/tests/remote_env_windows/remote_env_windows_test.rs +++ b/codex-rs/core/tests/remote_env_windows/remote_env_windows_test.rs @@ -2,20 +2,7 @@ use anyhow::Context; use anyhow::Result; -use app_test_support::PathBufExt; -use app_test_support::TestAppServer; -use app_test_support::create_mock_responses_server_repeating_assistant; -use app_test_support::to_response; -use app_test_support::write_mock_responses_config_toml; -use codex_app_server_protocol::RequestId; -use codex_app_server_protocol::ThreadStartParams; -use codex_app_server_protocol::ThreadStartResponse; -use codex_app_server_protocol::TurnEnvironmentParams; -use codex_app_server_protocol::TurnStartParams; -use codex_app_server_protocol::TurnStartResponse; -use codex_app_server_protocol::UserInput as V2UserInput; use codex_exec_server::REMOTE_ENVIRONMENT_ID; -use codex_exec_server::CODEX_EXEC_SERVER_URL_ENV_VAR; use codex_features::Feature; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::AskForApproval; @@ -35,20 +22,11 @@ use core_test_support::responses::start_mock_server; use core_test_support::test_codex::test_codex; use core_test_support::test_codex::turn_permission_fields; use core_test_support::wait_for_event; -use codex_utils_path_uri::LegacyAppPathString; -use codex_utils_path_uri::PathConvention; use codex_utils_path_uri::PathUri; use pretty_assertions::assert_eq; -use serde_json::Value; use serde_json::json; -use std::collections::BTreeMap; -use std::fs; -use tempfile::TempDir; -use tokio::time::timeout; use wine_exec_server_test_support::WineExecServer; -const APP_SERVER_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn windows_exec_server_runs_with_native_shell_and_cwd() -> Result<()> { const CALL_ID: &str = "wine-cmd-smoke"; @@ -244,152 +222,3 @@ async fn windows_exec_server_runs_with_native_shell_and_cwd() -> Result<()> { }) .await } - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn app_server_starts_thread_with_windows_environment_native_cwd() -> Result<()> { - const AGENTS_INSTRUCTIONS: &str = "remote Windows workspace instructions"; - const NATIVE_CWD: &str = r"C:\windows"; - - WineExecServer - .scope(|exec_server_url, wine_prefix| async move { - let agents_path = PathUri::parse("file:///C:/windows/AGENTS.md")?; - fs::write( - wine_prefix.join("drive_c").join("windows").join("AGENTS.md"), - AGENTS_INSTRUCTIONS, - )?; - - let codex_home = TempDir::new()?; - let server = create_mock_responses_server_repeating_assistant("done").await; - write_mock_responses_config_toml( - codex_home.path(), - &server.uri(), - &BTreeMap::new(), - 100_000, - /*requires_openai_auth*/ None, - "mock", - "compact", - )?; - let mut app_server = TestAppServer::new_with_env( - codex_home.path(), - &[( - CODEX_EXEC_SERVER_URL_ENV_VAR, - Some(exec_server_url.as_str()), - )], - ) - .await?; - timeout(APP_SERVER_READ_TIMEOUT, app_server.initialize()).await??; - - let request_id = app_server - .send_thread_start_request(ThreadStartParams { - environments: Some(vec![TurnEnvironmentParams { - environment_id: REMOTE_ENVIRONMENT_ID.to_string(), - cwd: serde_json::from_value::(json!(NATIVE_CWD))?, - }]), - ..Default::default() - }) - .await?; - let response = timeout( - APP_SERVER_READ_TIMEOUT, - app_server.read_stream_until_response_message(RequestId::Integer(request_id)), - ) - .await??; - let response: ThreadStartResponse = to_response(response)?; - assert!(!response.thread.id.is_empty()); - let host_cwd = codex_home.path().to_path_buf().abs(); - // TODO(anp): Return the selected environment's native cwd from thread/start. - assert_eq!(response.cwd, host_cwd); - // TODO(anp): Derive runtime workspace roots from the selected remote environment. - assert_eq!(response.runtime_workspace_roots, vec![host_cwd]); - assert_eq!( - response.instruction_sources, - vec![LegacyAppPathString::from_path_uri( - &agents_path, - PathConvention::Windows, - )?] - ); - // TODO(anp): Report the implicit built-in permission profile instead of None. - assert_eq!(response.active_permission_profile, None); - - let turn_request_id = app_server - .send_turn_start_request(TurnStartParams { - thread_id: response.thread.id, - client_user_message_id: None, - input: vec![V2UserInput::Text { - text: "say done".to_string(), - text_elements: Vec::new(), - }], - ..Default::default() - }) - .await?; - let turn_response = timeout( - APP_SERVER_READ_TIMEOUT, - app_server.read_stream_until_response_message(RequestId::Integer(turn_request_id)), - ) - .await??; - let _: TurnStartResponse = to_response(turn_response)?; - timeout( - APP_SERVER_READ_TIMEOUT, - app_server.read_stream_until_notification_message("turn/completed"), - ) - .await??; - - let requests = server - .received_requests() - .await - .context("failed to fetch received requests")?; - let first_request = requests - .iter() - .find(|request| request.url.path().ends_with("/responses")) - .context("turn should send a Responses request")?; - let body = first_request.body_json::()?; - let remote_instructions = body["input"] - .as_array() - .into_iter() - .flatten() - .filter(|item| item.get("role").and_then(Value::as_str) == Some("user")) - .filter_map(|item| item.get("content").and_then(Value::as_array)) - .flatten() - .filter_map(|content| content.get("text").and_then(Value::as_str)) - .find(|text| text.contains(AGENTS_INSTRUCTIONS)) - .context("remote workspace instructions should be model visible")?; - assert!(remote_instructions.contains(r"# AGENTS.md instructions for C:\windows")); - let environment_context = body["input"] - .as_array() - .into_iter() - .flatten() - .filter(|item| item.get("role").and_then(Value::as_str) == Some("user")) - .filter_map(|item| item.get("content").and_then(Value::as_array)) - .flatten() - .filter_map(|content| content.get("text").and_then(Value::as_str)) - .find(|text| text.starts_with("")) - .context("environment context should be model visible")?; - // The model should see the remote environment's shell, not the Linux app-server's - // host shell. - assert_eq!( - environment_context - .lines() - .find(|line| line.trim_start().starts_with("")) - .map(str::trim), - Some("powershell"), - ); - // The model should see cwd using the remote environment's native path convention, not - // the Linux app-server's host path convention. - assert_eq!( - environment_context - .lines() - .find(|line| line.trim_start().starts_with("")) - .map(str::trim), - Some(r"C:\windows"), - ); - let host_workspace_roots = format!( - "{}", - codex_home.path().display() - ); - // TODO(anp): Derive model-visible workspace roots from the selected remote environment - // and render them using its native path convention. - assert!(environment_context.contains(&host_workspace_roots)); - - Ok(()) - }) - .await -}