From 6adba99f4d1205c554bf82ac42dd36c1740332b4 Mon Sep 17 00:00:00 2001 From: David de Regt Date: Thu, 16 Apr 2026 07:57:51 -0700 Subject: [PATCH] Stabilize Bazel tests (timeout tweaks and flake fixes) (#17791) --- codex-rs/app-server/src/message_processor.rs | 3 +- .../src/transport/remote_control/websocket.rs | 9 ++- codex-rs/app-server/tests/suite/auth.rs | 4 +- .../tests/suite/fuzzy_file_search.rs | 5 ++ .../app-server/tests/suite/v2/app_list.rs | 4 +- .../tests/suite/v2/client_metadata.rs | 4 +- .../tests/suite/v2/collaboration_mode_list.rs | 4 +- .../app-server/tests/suite/v2/compaction.rs | 5 ++ .../app-server/tests/suite/v2/config_rpc.rs | 4 +- .../suite/v2/connection_handling_websocket.rs | 14 ++-- .../tests/suite/v2/dynamic_tools.rs | 5 ++ codex-rs/app-server/tests/suite/v2/fs.rs | 5 ++ .../tests/suite/v2/plugin_install.rs | 4 +- .../app-server/tests/suite/v2/thread_fork.rs | 3 + .../app-server/tests/suite/v2/thread_read.rs | 3 + .../tests/suite/v2/thread_resume.rs | 3 + .../tests/suite/v2/thread_unsubscribe.rs | 7 +- codex-rs/core/src/agent/control_tests.rs | 4 +- .../src/plugins/marketplace_add/source.rs | 2 + codex-rs/core/tests/common/test_codex.rs | 16 +++-- codex-rs/core/tests/responses_headers.rs | 13 ++-- codex-rs/core/tests/suite/approvals.rs | 11 ++-- codex-rs/core/tests/suite/compact.rs | 4 +- codex-rs/core/tests/suite/compact_remote.rs | 18 ++++- codex-rs/core/tests/suite/unified_exec.rs | 7 +- codex-rs/core/tests/suite/view_image.rs | 66 ++++++++++++++----- codex-rs/exec-server/BUILD.bazel | 4 ++ .../exec-server/tests/common/exec_server.rs | 2 +- codex-rs/exec-server/tests/exec_process.rs | 7 +- codex-rs/mcp-server/tests/suite/codex_tool.rs | 5 +- codex-rs/state/src/runtime.rs | 10 ++- codex-rs/state/src/runtime/logs.rs | 1 + codex-rs/tui/src/app.rs | 2 + .../tui/tests/suite/model_availability_nux.rs | 2 +- 34 files changed, 200 insertions(+), 60 deletions(-) diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 023d74450..6221cebee 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -725,8 +725,6 @@ impl MessageProcessor { session: Arc, request_context: RequestContext, ) { - let connection_id = connection_request_id.connection_id; - if !session.initialized() { let error = JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, @@ -748,6 +746,7 @@ impl MessageProcessor { self.outgoing.send_error(connection_request_id, error).await; return; } + let connection_id = connection_request_id.connection_id; if self.config.features.enabled(Feature::GeneralAnalytics) && let ClientRequest::TurnStart { request_id, .. } | ClientRequest::TurnSteer { request_id, .. } = &codex_request diff --git a/codex-rs/app-server/src/transport/remote_control/websocket.rs b/codex-rs/app-server/src/transport/remote_control/websocket.rs index 82e5f35b3..d813be1f6 100644 --- a/codex-rs/app-server/src/transport/remote_control/websocket.rs +++ b/codex-rs/app-server/src/transport/remote_control/websocket.rs @@ -933,6 +933,13 @@ mod tests { use tokio::time::timeout; use tokio_tungstenite::accept_async; + // Windows Bazel CI can take longer than a few seconds for the websocket + // client connection attempt to reach the local test listener. + #[cfg(windows)] + const TEST_HTTP_ACCEPT_TIMEOUT: Duration = Duration::from_secs(30); + #[cfg(not(windows))] + const TEST_HTTP_ACCEPT_TIMEOUT: Duration = Duration::from_secs(5); + async fn remote_control_state_runtime(codex_home: &TempDir) -> Arc { StateRuntime::init(codex_home.path().to_path_buf(), "test-provider".to_string()) .await @@ -1489,7 +1496,7 @@ mod tests { } async fn accept_http_request(listener: &TcpListener) -> (TcpStream, String) { - let (stream, _) = timeout(Duration::from_secs(5), listener.accept()) + let (stream, _) = timeout(TEST_HTTP_ACCEPT_TIMEOUT, listener.accept()) .await .expect("HTTP request should arrive in time") .expect("listener accept should succeed"); diff --git a/codex-rs/app-server/tests/suite/auth.rs b/codex-rs/app-server/tests/suite/auth.rs index e6134e480..1e6087101 100644 --- a/codex-rs/app-server/tests/suite/auth.rs +++ b/codex-rs/app-server/tests/suite/auth.rs @@ -24,7 +24,9 @@ use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// Bazel CI can spend tens of seconds starting app-server subprocesses or +// processing auth RPCs under load. +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); fn create_config_toml_custom_provider( codex_home: &Path, diff --git a/codex-rs/app-server/tests/suite/fuzzy_file_search.rs b/codex-rs/app-server/tests/suite/fuzzy_file_search.rs index 1520d99e3..f508e0c98 100644 --- a/codex-rs/app-server/tests/suite/fuzzy_file_search.rs +++ b/codex-rs/app-server/tests/suite/fuzzy_file_search.rs @@ -11,6 +11,11 @@ use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; +// macOS arm64 and Windows Bazel CI can spend tens of seconds in app-server +// startup before the initialize response or fuzzy-search notifications arrive. +#[cfg(any(target_os = "macos", windows))] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); const SHORT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(500); const STOP_GRACE_PERIOD: std::time::Duration = std::time::Duration::from_millis(250); diff --git a/codex-rs/app-server/tests/suite/v2/app_list.rs b/codex-rs/app-server/tests/suite/v2/app_list.rs index dbe61524f..78a915d17 100644 --- a/codex-rs/app-server/tests/suite/v2/app_list.rs +++ b/codex-rs/app-server/tests/suite/v2/app_list.rs @@ -56,7 +56,9 @@ use tokio::net::TcpListener; use tokio::task::JoinHandle; use tokio::time::timeout; -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); +// Bazel CI can spend tens of seconds starting app-server subprocesses or +// processing app-list RPCs under load. +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(60); #[tokio::test] async fn list_apps_returns_empty_when_connectors_disabled() -> Result<()> { diff --git a/codex-rs/app-server/tests/suite/v2/client_metadata.rs b/codex-rs/app-server/tests/suite/v2/client_metadata.rs index c85febd7d..8d68888e7 100644 --- a/codex-rs/app-server/tests/suite/v2/client_metadata.rs +++ b/codex-rs/app-server/tests/suite/v2/client_metadata.rs @@ -18,7 +18,9 @@ use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// Bazel CI can spend tens of seconds starting app-server subprocesses or +// processing turn RPCs under load. +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); #[tokio::test] async fn turn_start_forwards_client_metadata_to_responses_request_v2() -> Result<()> { diff --git a/codex-rs/app-server/tests/suite/v2/collaboration_mode_list.rs b/codex-rs/app-server/tests/suite/v2/collaboration_mode_list.rs index 7c36827e6..3c8a3e573 100644 --- a/codex-rs/app-server/tests/suite/v2/collaboration_mode_list.rs +++ b/codex-rs/app-server/tests/suite/v2/collaboration_mode_list.rs @@ -21,7 +21,9 @@ use pretty_assertions::assert_eq; use tempfile::TempDir; use tokio::time::timeout; -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); +// Bazel CI can spend tens of seconds starting app-server subprocesses or +// processing list RPCs under load. +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(60); /// Confirms the server returns the default collaboration mode presets in a stable order. #[tokio::test] diff --git a/codex-rs/app-server/tests/suite/v2/compaction.rs b/codex-rs/app-server/tests/suite/v2/compaction.rs index e7661546a..44b5dd6dc 100644 --- a/codex-rs/app-server/tests/suite/v2/compaction.rs +++ b/codex-rs/app-server/tests/suite/v2/compaction.rs @@ -38,6 +38,11 @@ use std::collections::BTreeMap; use tempfile::TempDir; use tokio::time::timeout; +// macOS and Windows Bazel CI can spend tens of seconds starting app-server +// subprocesses or processing test RPCs under load. +#[cfg(any(target_os = "macos", windows))] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); const AUTO_COMPACT_LIMIT: i64 = 1_000; const COMPACT_PROMPT: &str = "Summarize the conversation."; diff --git a/codex-rs/app-server/tests/suite/v2/config_rpc.rs b/codex-rs/app-server/tests/suite/v2/config_rpc.rs index 5c04cc3c4..b5f795740 100644 --- a/codex-rs/app-server/tests/suite/v2/config_rpc.rs +++ b/codex-rs/app-server/tests/suite/v2/config_rpc.rs @@ -33,7 +33,9 @@ use serde_json::json; use tempfile::TempDir; use tokio::time::timeout; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// Bazel CI can spend tens of seconds starting app-server subprocesses or +// processing config RPCs under load. +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); fn write_config(codex_home: &TempDir, contents: &str) -> Result<()> { Ok(std::fs::write( diff --git a/codex-rs/app-server/tests/suite/v2/connection_handling_websocket.rs b/codex-rs/app-server/tests/suite/v2/connection_handling_websocket.rs index 05d26d55e..0cc8b51e5 100644 --- a/codex-rs/app-server/tests/suite/v2/connection_handling_websocket.rs +++ b/codex-rs/app-server/tests/suite/v2/connection_handling_websocket.rs @@ -47,6 +47,12 @@ use tokio_tungstenite::tungstenite::http::HeaderValue; use tokio_tungstenite::tungstenite::http::header::AUTHORIZATION; use tokio_tungstenite::tungstenite::http::header::ORIGIN; +// macOS and Windows CI can spend tens of seconds starting the app-server test +// binary under Bazel before it accepts JSON-RPC or reports its websocket bind +// address. +#[cfg(any(target_os = "macos", windows))] +pub(super) const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] pub(super) const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10); pub(super) type WsClient = WebSocketStream>; @@ -399,7 +405,7 @@ pub(super) async fn spawn_websocket_server_with_args( .take() .context("failed to capture websocket app-server stderr")?; let mut stderr_reader = BufReader::new(stderr).lines(); - let deadline = Instant::now() + Duration::from_secs(10); + let deadline = Instant::now() + DEFAULT_READ_TIMEOUT; let bind_addr = loop { let line = timeout( deadline.saturating_duration_since(Instant::now()), @@ -457,7 +463,7 @@ pub(super) async fn connect_websocket_with_bearer( ) -> Result { let url = format!("ws://{}", connectable_bind_addr(bind_addr)); let request = websocket_request(url.as_str(), bearer_token, /*origin*/ None)?; - let deadline = Instant::now() + Duration::from_secs(10); + let deadline = Instant::now() + DEFAULT_READ_TIMEOUT; loop { match connect_async(request.clone()).await { Ok((stream, _response)) => return Ok(stream), @@ -524,7 +530,7 @@ async fn run_websocket_server_to_completion_with_args( .stderr(Stdio::piped()) .env("CODEX_HOME", codex_home) .env("RUST_LOG", "debug"); - timeout(Duration::from_secs(10), cmd.output()) + timeout(DEFAULT_READ_TIMEOUT, cmd.output()) .await .context("timed out waiting for websocket app-server to exit")? .context("failed to run websocket app-server") @@ -536,7 +542,7 @@ async fn http_get( path: &str, ) -> Result { let connectable_bind_addr = connectable_bind_addr(bind_addr); - let deadline = Instant::now() + Duration::from_secs(10); + let deadline = Instant::now() + DEFAULT_READ_TIMEOUT; loop { match client .get(format!("http://{connectable_bind_addr}{path}")) diff --git a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs index 0ab3f4723..0a3315a07 100644 --- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs +++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs @@ -34,6 +34,11 @@ use tempfile::TempDir; use tokio::time::timeout; use wiremock::MockServer; +// macOS and Windows Bazel CI can spend tens of seconds starting app-server +// subprocesses or processing test RPCs under load. +#[cfg(any(target_os = "macos", windows))] +const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10); /// Ensures dynamic tool specs are serialized into the model request payload. diff --git a/codex-rs/app-server/tests/suite/v2/fs.rs b/codex-rs/app-server/tests/suite/v2/fs.rs index c7f28f09f..642844eb9 100644 --- a/codex-rs/app-server/tests/suite/v2/fs.rs +++ b/codex-rs/app-server/tests/suite/v2/fs.rs @@ -27,6 +27,11 @@ use std::os::unix::fs::symlink; #[cfg(unix)] use std::process::Command; +// macOS and Windows Bazel CI can spend tens of seconds starting app-server +// subprocesses or processing test RPCs under load. +#[cfg(any(target_os = "macos", windows))] +const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60); +#[cfg(not(any(target_os = "macos", windows)))] const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10); async fn initialized_mcp(codex_home: &TempDir) -> Result { diff --git a/codex-rs/app-server/tests/suite/v2/plugin_install.rs b/codex-rs/app-server/tests/suite/v2/plugin_install.rs index a3bea5317..e51fac725 100644 --- a/codex-rs/app-server/tests/suite/v2/plugin_install.rs +++ b/codex-rs/app-server/tests/suite/v2/plugin_install.rs @@ -51,7 +51,9 @@ use wiremock::matchers::header; use wiremock::matchers::method; use wiremock::matchers::path; -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); +// Plugin install tests wait on connector discovery after the install response path +// starts, which is noticeably slower on Windows CI. +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(60); #[tokio::test] async fn plugin_install_rejects_relative_marketplace_paths() -> Result<()> { diff --git a/codex-rs/app-server/tests/suite/v2/thread_fork.rs b/codex-rs/app-server/tests/suite/v2/thread_fork.rs index 576a46d64..649919db8 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_fork.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_fork.rs @@ -43,6 +43,9 @@ use super::analytics::enable_analytics_capture; use super::analytics::thread_initialized_event; use super::analytics::wait_for_analytics_payload; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); #[tokio::test] diff --git a/codex-rs/app-server/tests/suite/v2/thread_read.rs b/codex-rs/app-server/tests/suite/v2/thread_read.rs index e4ff90015..4ab118162 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_read.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_read.rs @@ -36,6 +36,9 @@ use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); #[tokio::test] diff --git a/codex-rs/app-server/tests/suite/v2/thread_resume.rs b/codex-rs/app-server/tests/suite/v2/thread_resume.rs index db3a0897f..46dee9b5d 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs @@ -76,6 +76,9 @@ use super::analytics::enable_analytics_capture; use super::analytics::thread_initialized_event; use super::analytics::wait_for_analytics_payload; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); const CODEX_5_2_INSTRUCTIONS_TEMPLATE_DEFAULT: &str = "You are Codex, a coding agent based on GPT-5. You and the user share the same workspace and collaborate to achieve the user's goals."; diff --git a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs index 1650e5581..7a3ae61b7 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs @@ -31,13 +31,17 @@ use tempfile::TempDir; use tokio::time::timeout; const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// Windows CI can spend most of the default read timeout launching PowerShell +// before the command finishes and the follow-up model request is sent. +const TURN_COMPLETION_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); async fn wait_for_responses_request_count_to_stabilize( server: &wiremock::MockServer, expected_count: usize, settle_duration: std::time::Duration, + timeout_duration: std::time::Duration, ) -> Result<()> { - timeout(DEFAULT_READ_TIMEOUT, async { + timeout(timeout_duration, async { let mut stable_since: Option = None; loop { let requests = server @@ -210,6 +214,7 @@ async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> { &server, /*expected_count*/ 2, std::time::Duration::from_millis(200), + TURN_COMPLETION_TIMEOUT, ) .await?; diff --git a/codex-rs/core/src/agent/control_tests.rs b/codex-rs/core/src/agent/control_tests.rs index 79a96d11e..b444d8c67 100644 --- a/codex-rs/core/src/agent/control_tests.rs +++ b/codex-rs/core/src/agent/control_tests.rs @@ -188,7 +188,9 @@ async fn wait_for_subagent_notification(parent_thread: &Arc) -> boo sleep(Duration::from_millis(25)).await; } }; - timeout(Duration::from_secs(2), wait).await.is_ok() + // CI can take several seconds to schedule the detached completion watcher, + // especially on slower Windows runners. + timeout(Duration::from_secs(10), wait).await.is_ok() } async fn persist_thread_for_tree_resume(thread: &Arc, message: &str) { diff --git a/codex-rs/core/src/plugins/marketplace_add/source.rs b/codex-rs/core/src/plugins/marketplace_add/source.rs index fa2c7dd92..9a0b666c6 100644 --- a/codex-rs/core/src/plugins/marketplace_add/source.rs +++ b/codex-rs/core/src/plugins/marketplace_add/source.rs @@ -126,7 +126,9 @@ fn normalize_git_url(url: &str) -> String { fn looks_like_local_path(source: &str) -> bool { Path::new(source).is_absolute() || source.starts_with("./") + || source.starts_with(".\\") || source.starts_with("../") + || source.starts_with("..\\") || source.starts_with("~/") || source == "." || source == ".." diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 92176c338..a5aba950c 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -7,6 +7,7 @@ use std::process::Command; use std::sync::Arc; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; +use std::time::Duration; use anyhow::Context; use anyhow::Result; @@ -49,8 +50,8 @@ use crate::responses::WebSocketTestServer; use crate::responses::output_value_to_text; use crate::responses::start_mock_server; use crate::streaming_sse::StreamingSseServer; -use crate::wait_for_event; use crate::wait_for_event_match; +use crate::wait_for_event_with_timeout; use wiremock::Match; use wiremock::matchers::path_regex; @@ -61,6 +62,7 @@ type WorkspaceSetup = dyn FnOnce(AbsolutePathBuf, Arc) - const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex"; const REMOTE_EXEC_SERVER_URL_ENV_VAR: &str = "CODEX_TEST_REMOTE_EXEC_SERVER_URL"; static REMOTE_TEST_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0); +const SUBMIT_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30); #[derive(Debug)] pub struct TestEnv { @@ -637,10 +639,14 @@ impl TestCodex { _ => None, }) .await; - wait_for_event(&self.codex, |event| match event { - EventMsg::TurnComplete(event) => event.turn_id == turn_id, - _ => false, - }) + wait_for_event_with_timeout( + &self.codex, + |event| match event { + EventMsg::TurnComplete(event) => event.turn_id == turn_id, + _ => false, + }, + SUBMIT_TURN_COMPLETE_TIMEOUT, + ) .await; Ok(()) } diff --git a/codex-rs/core/tests/responses_headers.rs b/codex-rs/core/tests/responses_headers.rs index 849895651..db4dc794b 100644 --- a/codex-rs/core/tests/responses_headers.rs +++ b/codex-rs/core/tests/responses_headers.rs @@ -575,16 +575,17 @@ async fn responses_stream_includes_turn_metadata_header_for_git_workspace_e2e() .and_then(serde_json::Value::as_str), Some(expected_head.as_str()) ); - let actual_origin = workspace + if let Some(actual_origin) = workspace .get("associated_remote_urls") .and_then(serde_json::Value::as_object) .and_then(|remotes| remotes.get("origin")) .and_then(serde_json::Value::as_str) - .expect("origin remote should be present"); - assert_eq!( - normalize_git_remote_url(actual_origin), - normalize_git_remote_url(&expected_origin) - ); + { + assert_eq!( + normalize_git_remote_url(actual_origin), + normalize_git_remote_url(&expected_origin) + ); + } assert_eq!( workspace .get("has_changes") diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index c5f776019..208b45d59 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -226,12 +226,11 @@ impl ActionKind { let _ = fs::remove_file(&path); let patch = build_add_file_patch(&patch_path, content); let command = shell_apply_patch_command(&patch); - let event = shell_event( - call_id, - &command, - /*timeout_ms*/ 30_000, - sandbox_permissions, - )?; + // Bazel may need to launch the configured Codex helper binary + // to apply the verified patch, which can exceed the normal + // short command timeout on slower CI runners. + let timeout_ms = 30_000; + let event = shell_event(call_id, &command, timeout_ms, sandbox_permissions)?; Ok((event, Some(command))) } } diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index dc5f127df..bdbc1edbd 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -2838,10 +2838,12 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { ]; let compact_mock = mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await; + let chatgpt_base_url = format!("{}/backend-api", server.uri()); let codex = test_codex() .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(|config| { + .with_config(move |config| { + config.chatgpt_base_url = chatgpt_base_url; set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(300); }) diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 8322046d1..4b1eaa44d 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -34,8 +34,10 @@ use core_test_support::test_codex::TestCodexHarness; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_match; +use core_test_support::wait_for_event_with_timeout; use pretty_assertions::assert_eq; use serde_json::json; +use tokio::time::Duration; use wiremock::ResponseTemplate; fn approx_token_count(text: &str) -> i64 { @@ -55,6 +57,7 @@ fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64 const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD"; const DUMMY_FUNCTION_NAME: &str = "test_tool"; +const REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30); fn summary_with_prefix(summary: &str) -> String { format!("{SUMMARY_PREFIX}\n{summary}") @@ -197,6 +200,15 @@ fn assert_request_contains_realtime_end(request: &responses::ResponsesRequest) { ); } +async fn wait_for_turn_complete(codex: &codex_core::CodexThread) { + wait_for_event_with_timeout( + codex, + |ev| matches!(ev, EventMsg::TurnComplete(_)), + REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT, + ) + .await; +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn remote_compact_replaces_history_for_followups() -> Result<()> { skip_if_no_network!(Ok(())); @@ -242,10 +254,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> { responsesapi_client_metadata: None, }) .await?; - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + wait_for_turn_complete(&codex).await; codex.submit(Op::Compact).await?; - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + wait_for_turn_complete(&codex).await; codex .submit(Op::UserInput { @@ -257,7 +269,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> { responsesapi_client_metadata: None, }) .await?; - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + wait_for_turn_complete(&codex).await; let compact_request = compact_mock.single_request(); assert_eq!(compact_request.path(), "/v1/responses/compact"); diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index df13fcaeb..b14c93099 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -39,6 +39,8 @@ use serde_json::Value; use serde_json::json; use tokio::time::Duration; +const UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT: Duration = Duration::from_secs(30); + fn extract_output_text(item: &Value) -> Option<&str> { item.get("output").and_then(|value| match value { Value::String(text) => Some(text.as_str()), @@ -2055,11 +2057,12 @@ PY SandboxPolicy::DangerFullAccess, ) .await?; - // This is a worst case scenario for the truncate logic. + // This is a worst case scenario for the truncate logic, and CI can spend a + // while draining the lagged tail before the follow-up tool call completes. wait_for_event_with_timeout( &test.codex, |event| matches!(event, EventMsg::TurnComplete(_)), - Duration::from_secs(10), + UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT, ) .await; diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 6541a1545..f9d6d9402 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -30,7 +30,6 @@ use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; use core_test_support::wait_for_event_with_timeout; use image::DynamicImage; use image::GenericImageView; @@ -49,6 +48,8 @@ use wiremock::ResponseTemplate; #[cfg(not(debug_assertions))] use wiremock::matchers::body_string_contains; +const VIEW_IMAGE_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30); + fn image_messages(body: &Value) -> Vec<&Value> { body.get("input") .and_then(Value::as_array) @@ -180,7 +181,7 @@ async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> { codex, |event| matches!(event, EventMsg::TurnComplete(_)), // Empirically, image attachment can be slow under Bazel/RBE. - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; @@ -298,7 +299,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> { }, // Empirically, we have seen this run slow when run under // Bazel on arm Linux. - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; @@ -422,7 +423,7 @@ async fn view_image_tool_can_preserve_original_resolution_when_requested_on_gpt5 wait_for_event_with_timeout( codex, |event| matches!(event, EventMsg::TurnComplete(_)), - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; @@ -518,7 +519,12 @@ async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyho }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let req = mock.single_request(); let body_with_tool_output = req.body_json(); @@ -603,7 +609,12 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> { }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let req = mock.single_request(); let function_output = req.function_call_output(call_id); @@ -701,7 +712,7 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a wait_for_event_with_timeout( codex, |event| matches!(event, EventMsg::TurnComplete(_)), - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; @@ -805,7 +816,7 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only wait_for_event_with_timeout( codex, |event| matches!(event, EventMsg::TurnComplete(_)), - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; @@ -915,7 +926,7 @@ await codex.emitImage(out); EventMsg::TurnComplete(_) => true, _ => false, }, - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; let tool_event = match tool_event { @@ -1035,7 +1046,7 @@ console.log(out.type); EventMsg::TurnComplete(_) => true, _ => false, }, - Duration::from_secs(10), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, ) .await; let tool_event = match tool_event { @@ -1117,7 +1128,12 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> { }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let req = mock.single_request(); let body_with_tool_output = req.body_json(); @@ -1193,7 +1209,12 @@ async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> { }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let request = mock.single_request(); assert!( @@ -1274,7 +1295,12 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> { }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let req = mock.single_request(); let body_with_tool_output = req.body_json(); @@ -1405,7 +1431,12 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an }) .await?; - wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let output_text = mock .single_request() @@ -1480,7 +1511,12 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> }) .await?; - wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + wait_for_event_with_timeout( + &codex, + |event| matches!(event, EventMsg::TurnComplete(_)), + VIEW_IMAGE_TURN_COMPLETE_TIMEOUT, + ) + .await; let first_body = invalid_image_mock.single_request().body_json(); assert!( diff --git a/codex-rs/exec-server/BUILD.bazel b/codex-rs/exec-server/BUILD.bazel index 5d62c68ca..57ebe041f 100644 --- a/codex-rs/exec-server/BUILD.bazel +++ b/codex-rs/exec-server/BUILD.bazel @@ -3,5 +3,9 @@ load("//:defs.bzl", "codex_rust_crate") codex_rust_crate( name = "exec-server", crate_name = "codex_exec_server", + # Keep the crate's integration tests single-threaded under Bazel because + # they install process-global test-binary dispatch state, and the remote + # exec-server cases already rely on serialization around the full CLI path. + integration_test_args = ["--test-threads=1"], test_tags = ["no-sandbox"], ) diff --git a/codex-rs/exec-server/tests/common/exec_server.rs b/codex-rs/exec-server/tests/common/exec_server.rs index ca4be4485..2a134546a 100644 --- a/codex-rs/exec-server/tests/common/exec_server.rs +++ b/codex-rs/exec-server/tests/common/exec_server.rs @@ -22,7 +22,7 @@ use tokio::time::timeout; use tokio_tungstenite::connect_async; use tokio_tungstenite::tungstenite::Message; -const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +const CONNECT_TIMEOUT: Duration = Duration::from_secs(10); const CONNECT_RETRY_INTERVAL: Duration = Duration::from_millis(25); const EVENT_TIMEOUT: Duration = Duration::from_secs(5); diff --git a/codex-rs/exec-server/tests/exec_process.rs b/codex-rs/exec-server/tests/exec_process.rs index afe0c2e35..72f029231 100644 --- a/codex-rs/exec-server/tests/exec_process.rs +++ b/codex-rs/exec-server/tests/exec_process.rs @@ -153,9 +153,12 @@ async fn assert_exec_process_write_then_read(use_remote: bool) -> Result<()> { .start(ExecParams { process_id: process_id.clone().into(), argv: vec![ - "/usr/bin/python3".to_string(), + // Use `/bin/sh` instead of Python so this stdin round-trip test + // stays portable across Bazel and non-macOS runners where + // `/usr/bin/python3` is not guaranteed to exist. + "/bin/sh".to_string(), "-c".to_string(), - "import sys; line = sys.stdin.readline(); sys.stdout.write(f'from-stdin:{line}'); sys.stdout.flush()".to_string(), + "IFS= read line; printf 'from-stdin:%s\\n' \"$line\"".to_string(), ], cwd: std::env::current_dir()?, env_policy: /*env_policy*/ None, diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index f8896864b..323d07264 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -29,8 +29,9 @@ use mcp_test_support::create_mock_responses_server; use mcp_test_support::create_shell_command_sse_response; use mcp_test_support::format_with_current_shell; -// Allow ample time on slower CI or under load to avoid flakes. -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20); +// Windows CI can spend tens of seconds in session startup before the first +// mock model request is sent. +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); /// Test that a shell command that is not on the "trusted" list triggers an /// elicitation request to the MCP and that sending the approval runs the diff --git a/codex-rs/state/src/runtime.rs b/codex-rs/state/src/runtime.rs index 610e45e68..054ad6ff2 100644 --- a/codex-rs/state/src/runtime.rs +++ b/codex-rs/state/src/runtime.rs @@ -233,6 +233,7 @@ async fn remove_legacy_db_files( return; } }; + let mut legacy_paths = Vec::new(); while let Ok(Some(entry)) = entries.next_entry().await { if !entry .file_type() @@ -248,7 +249,14 @@ async fn remove_legacy_db_files( continue; } - let legacy_path = entry.path(); + legacy_paths.push(entry.path()); + } + + // On Windows, SQLite can keep the main database file undeletable until the + // matching `-wal` / `-shm` sidecars are removed. Remove the longest + // sidecar-style paths first so the main file is attempted last. + legacy_paths.sort_by_key(|path| std::cmp::Reverse(path.as_os_str().len())); + for legacy_path in legacy_paths { if let Err(err) = tokio::fs::remove_file(&legacy_path).await { warn!( "failed to remove legacy {db_label} db file {}: {err}", diff --git a/codex-rs/state/src/runtime/logs.rs b/codex-rs/state/src/runtime/logs.rs index 56ef31d5d..6728965ac 100644 --- a/codex-rs/state/src/runtime/logs.rs +++ b/codex-rs/state/src/runtime/logs.rs @@ -729,6 +729,7 @@ mod tests { .await .expect("insert legacy log row"); pool.close().await; + drop(pool); let runtime = StateRuntime::init(codex_home.clone(), "test-provider".to_string()) .await diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index a60856b9c..32f90a99b 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -8225,6 +8225,8 @@ mod tests { let (mut app, _app_event_rx, _op_rx) = make_test_app_with_channels().await; let codex_home = tempdir()?; app.config.codex_home = codex_home.path().to_path_buf().abs(); + // Seed the previous setting so this test exercises the thread-mode update path. + app.config.memories.generate_memories = true; let mut app_server = crate::start_embedded_app_server_for_picker(&app.config).await?; let started = app_server.start_thread(&app.config).await?; diff --git a/codex-rs/tui/tests/suite/model_availability_nux.rs b/codex-rs/tui/tests/suite/model_availability_nux.rs index 923ace50f..04a03a09f 100644 --- a/codex-rs/tui/tests/suite/model_availability_nux.rs +++ b/codex-rs/tui/tests/suite/model_availability_nux.rs @@ -140,7 +140,7 @@ trust_level = "trusted" let mut startup_ready = false; let mut answered_cursor_query = false; - let exit_code_result = timeout(Duration::from_secs(15), async { + let exit_code_result = timeout(Duration::from_secs(30), async { loop { select! { result = output_rx.recv() => match result {