mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
d4ec08b8f0
## Summary - complete unified-exec processes from the ordered event stream instead of issuing a final zero-wait `process/read` - add optional executor sandbox-denial state to `process/exited` - retain `process/read` as a retained-output and compatibility fallback for receiver lag, sequence gaps, and legacy servers - recover sandbox-denial state across transport reconnection - cover the real `TestCodex` remote-exec path without adding a public test-only event constructor ## Why A successful one-shot tool call currently receives its output and terminal notifications, then pays another wide-area `process/read` round trip before returning. Staging traces showed that remote response wait accounted for more than 99.8% of RPC time; local serialization, queueing, and deserialization were below 0.6 ms. ## Measured impact A direct staging A/B used the same build and route and changed only completion mode. Each arm ran three times with 30 one-shot `/usr/bin/true` calls per run. The table reports the median of the three per-run percentiles. | Metric | Final `process/read` | Pushed events | Change | | --- | ---: | ---: | ---: | | End-to-end completion p50 | 159.5 ms | 118.7 ms | -40.8 ms (-25.6%) | | End-to-end completion p95 | 182.4 ms | 131.7 ms | -50.6 ms (-27.8%) | | Completion-wait p50 | 80.1 ms | 41.5 ms | -38.5 ms (-48.1%) | | Final `process/read` RPC p50 | 79.9 ms | eliminated | -79.9 ms | TCP_NODELAY was enabled in both A/B arms, so its effect cancels out. The successful, complete, in-order event path issued zero final `process/read` calls. ## Compatibility and recovery - new servers send `sandboxDenied` on `process/exited` - legacy servers omit it, which triggers one compatibility `process/read` - broadcast lag or a sequence gap triggers a retained-output read - recovery remains bounded by the server's existing 1 MiB retained-output window - complete, in-order event streams issue no completion read - sandbox denial is attached to the exit event before consumers can observe process completion - server-first and client-first rollouts remain wire-compatible; server-first realizes the latency win immediately ## Integration coverage The `TestCodex` suite exercises four distinct remote-exec contracts: - complete pushed output/exit/close with zero reads - direct pushed sandbox denial with zero reads - legacy missing denial metadata with exactly one compatibility read - count-bounded replay eviction recovered from retained output without duplication ## Validation - `just test -p codex-core exec_command_consumes_pushed_remote_process_events`: 4 passed - `just test -p codex-core unified_exec::process_tests::`: 4 passed - `just test -p codex-exec-server`: 294 passed, 2 skipped - `just test -p codex-exec-server-protocol`: 5 passed - `just test -p codex-rmcp-client`: 89 passed, 2 skipped - focused Bazel `//codex-rs/core:core-all-test`: passed across 16 shards - scoped `just fix` passed for core and exec-server - `just fmt` passed The complete workspace suite was not rerun; focused Cargo and Bazel coverage passed for the changed behavior.
141 lines
3.5 KiB
Rust
141 lines
3.5 KiB
Rust
// Aggregates all former standalone integration tests as modules.
|
|
use codex_apply_patch::CODEX_CORE_APPLY_PATCH_ARG1;
|
|
use codex_exec_server::CODEX_FS_HELPER_ARG1;
|
|
use codex_sandboxing::landlock::CODEX_LINUX_SANDBOX_ARG0;
|
|
use codex_test_binary_support::TestBinaryDispatchGuard;
|
|
use codex_test_binary_support::TestBinaryDispatchMode;
|
|
use codex_test_binary_support::configure_test_binary_dispatch;
|
|
use ctor::ctor;
|
|
|
|
// This code runs before any other tests are run.
|
|
// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox
|
|
// based on the arg0.
|
|
// NOTE: this doesn't work on ARM
|
|
#[ctor]
|
|
pub static CODEX_ALIASES_TEMP_DIR: Option<TestBinaryDispatchGuard> = {
|
|
configure_test_binary_dispatch("codex-core-tests", |exe_name, argv1| {
|
|
if argv1 == Some(CODEX_CORE_APPLY_PATCH_ARG1) {
|
|
return TestBinaryDispatchMode::DispatchArg0Only;
|
|
}
|
|
if argv1 == Some(CODEX_FS_HELPER_ARG1) {
|
|
return TestBinaryDispatchMode::DispatchArg0Only;
|
|
}
|
|
if exe_name == CODEX_LINUX_SANDBOX_ARG0 {
|
|
return TestBinaryDispatchMode::DispatchArg0Only;
|
|
}
|
|
TestBinaryDispatchMode::InstallAliases
|
|
})
|
|
};
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod abort_tasks;
|
|
mod additional_context;
|
|
mod agent_execution;
|
|
mod agent_jobs;
|
|
mod agent_websocket;
|
|
mod agents_md;
|
|
mod apply_patch_cli;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod approvals;
|
|
mod auto_review;
|
|
mod cli_stream;
|
|
mod client;
|
|
mod client_websockets;
|
|
mod code_mode;
|
|
mod codex_delegate;
|
|
mod collaboration_instructions;
|
|
mod compact;
|
|
mod compact_remote;
|
|
mod compact_remote_parity;
|
|
mod compact_resume_fork;
|
|
mod current_time_reminder;
|
|
mod deprecation_notice;
|
|
mod exec;
|
|
mod exec_policy;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod extension_sandbox;
|
|
mod fork_thread;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod guardian_review;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod hooks;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod hooks_mcp;
|
|
mod image_rollout;
|
|
mod items;
|
|
mod json_result;
|
|
mod live_cli;
|
|
#[cfg(unix)]
|
|
mod mcp_refresh_cleanup;
|
|
mod mcp_tool_exposure;
|
|
mod mcp_turn_metadata;
|
|
mod model_overrides;
|
|
mod model_runtime_selectors;
|
|
mod model_switching;
|
|
mod model_visible_layout;
|
|
mod models_cache_ttl;
|
|
mod models_etag_responses;
|
|
mod multi_agent_mode;
|
|
mod network_approval;
|
|
mod openai_file_mcp;
|
|
mod otel;
|
|
mod override_updates;
|
|
mod pending_input;
|
|
mod permissions_messages;
|
|
mod personality;
|
|
mod personality_migration;
|
|
mod plugins;
|
|
mod prompt_caching;
|
|
mod prompt_debug_tests;
|
|
mod quota_exceeded;
|
|
mod realtime_conversation;
|
|
mod remote_env;
|
|
mod remote_models;
|
|
mod request_compression;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod request_permissions;
|
|
#[cfg(not(target_os = "windows"))]
|
|
mod request_permissions_tool;
|
|
mod request_plugin_install;
|
|
mod request_user_input;
|
|
mod responses_api_proxy_headers;
|
|
mod responses_lite;
|
|
mod resume;
|
|
mod resume_warning;
|
|
mod review;
|
|
mod rmcp_client;
|
|
mod rollout_budget;
|
|
mod rollout_list_find;
|
|
mod safety_buffering;
|
|
mod safety_check_downgrade;
|
|
mod search_tool;
|
|
mod shell_command;
|
|
mod shell_serialization;
|
|
mod shell_snapshot;
|
|
mod skill_approval;
|
|
mod skills;
|
|
mod spawn_agent_description;
|
|
mod sqlite_state;
|
|
mod stream_error_allows_next_turn;
|
|
mod stream_no_completed;
|
|
mod subagent_notifications;
|
|
mod token_budget;
|
|
mod tool_harness;
|
|
mod tool_parallelism;
|
|
mod tools;
|
|
mod truncation;
|
|
mod turn_state;
|
|
mod unified_exec;
|
|
mod unified_exec_process_events;
|
|
#[cfg(unix)]
|
|
mod unified_exec_zsh_fork_approvals;
|
|
mod unstable_features_warning;
|
|
mod user_notification;
|
|
mod user_shell_cmd;
|
|
mod view_image;
|
|
mod web_search;
|
|
mod websocket_fallback;
|
|
mod window_headers;
|
|
#[cfg(target_os = "windows")]
|
|
mod windows_sandbox;
|