diff --git a/.github/workflows/rust-ci-full.yml b/.github/workflows/rust-ci-full.yml index 3031cb685..60d508b21 100644 --- a/.github/workflows/rust-ci-full.yml +++ b/.github/workflows/rust-ci-full.yml @@ -402,13 +402,6 @@ jobs: - name: cargo clippy run: cargo clippy --target ${{ matrix.target }} --tests --profile ${{ matrix.profile }} --timings -- -D warnings - - uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2.62.49 - with: - tool: just - - - name: End-to-end benchmark smoke test - run: just bench-e2e-smoke - - name: Upload Cargo timings (clippy) if: always() uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 21f8af0d6..aac0d6f90 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2058,17 +2058,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "codex-app-server-start-bench" -version = "0.0.0" -dependencies = [ - "anyhow", - "codex-app-server-protocol", - "divan", - "serde_json", - "tempfile", -] - [[package]] name = "codex-app-server-test-client" version = "0.0.0" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 5ef6a45fc..b1f9e4c31 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -14,7 +14,6 @@ members = [ "app-server-client", "app-server-protocol", "app-server-test-client", - "benchmarks/app-server-start", "debug-client", "apply-patch", "arg0", diff --git a/codex-rs/benchmarks/app-server-start/BUILD.bazel b/codex-rs/benchmarks/app-server-start/BUILD.bazel deleted file mode 100644 index 4b8f09413..000000000 --- a/codex-rs/benchmarks/app-server-start/BUILD.bazel +++ /dev/null @@ -1,20 +0,0 @@ -load("//:defs.bzl", "codex_rust_crate", "workspace_root_test") - -codex_rust_crate( - name = "app-server-start-bench", - crate_name = "codex_app_server_start_bench", -) - -workspace_root_test( - name = "startup-bench", - # Include the Bazel-built CLI in this target's runfiles and expose its - # resolved executable path through the environment consumed by the runner. - runfile_env = { - "//codex-rs/cli:codex": "CODEX_BIN", - }, - size = "large", - tags = ["manual"], - test_bin = ":codex-app-server-start-bench", - timeout = "long", - workspace_root_marker = "//codex-rs/utils/cargo-bin:repo_root.marker", -) diff --git a/codex-rs/benchmarks/app-server-start/Cargo.toml b/codex-rs/benchmarks/app-server-start/Cargo.toml deleted file mode 100644 index edf680eee..000000000 --- a/codex-rs/benchmarks/app-server-start/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "codex-app-server-start-bench" -version.workspace = true -edition.workspace = true -license.workspace = true -publish = false - -[lints] -workspace = true - -[dependencies] -anyhow = { workspace = true } -codex-app-server-protocol = { workspace = true } -serde_json = { workspace = true } -divan = { workspace = true } -tempfile = { workspace = true } diff --git a/codex-rs/benchmarks/app-server-start/src/main.rs b/codex-rs/benchmarks/app-server-start/src/main.rs deleted file mode 100644 index 5363b49ff..000000000 --- a/codex-rs/benchmarks/app-server-start/src/main.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::io::BufRead; -use std::io::BufReader; -use std::io::Write; -use std::path::Path; -use std::path::PathBuf; -use std::process::Child; -use std::process::ChildStdin; -use std::process::ChildStdout; -use std::process::Command; -use std::process::Stdio; - -use anyhow::Context; -use anyhow::Result; -use anyhow::bail; -use codex_app_server_protocol::ClientInfo; -use codex_app_server_protocol::ClientRequest; -use codex_app_server_protocol::InitializeCapabilities; -use codex_app_server_protocol::InitializeParams; -use codex_app_server_protocol::InitializeResponse; -use codex_app_server_protocol::JSONRPCMessage; -use codex_app_server_protocol::JSONRPCNotification; -use codex_app_server_protocol::JSONRPCResponse; -use codex_app_server_protocol::RequestId; -use divan::Bencher; - -fn main() { - divan::main(); -} - -// Process startup is slow enough that 30 samples keep runs practical, and -// sample size 1 lets each measured server be reaped before the next starts. -// The benchmark warms one CODEX_HOME before timing to measure normal restarts. -// This e2e runner receives its separately built Codex binary from just or Bazel. -#[divan::bench(sample_count = 30, sample_size = 1, skip_ext_time)] -#[allow(clippy::expect_used)] -fn initialize_response(bencher: Bencher) { - let codex_bin = std::env::var_os("CODEX_BIN") - .map(PathBuf::from) - .expect("CODEX_BIN must point to the codex binary; run via `just bench-e2e` or Bazel"); - let codex_home = tempfile::tempdir().expect("benchmark CODEX_HOME should be created"); - drop( - start_until_initialize_response(&codex_bin, codex_home.path()) - .expect("benchmark CODEX_HOME should be initialized"), - ); - - bencher.bench_local(|| { - start_until_initialize_response(&codex_bin, codex_home.path()) - .expect("codex app-server should return an initialize response") - }); -} - -/// A running app-server that has returned a valid `initialize` response. -/// -/// Divan drops benchmark outputs after it stops each timer interval. Returning -/// this value from the measured closure keeps the `initialized` notification -/// and forced process reaping outside startup latency. -struct InitializedAppServer { - child: Child, - stdin: Option, - stdout: Option>, - acknowledge_on_drop: bool, -} - -/// Spawn a stdio app-server and return once it responds successfully to `initialize`. -fn start_until_initialize_response( - codex_bin: &Path, - codex_home: &Path, -) -> Result { - let request_id = RequestId::Integer(0); - let child = Command::new(codex_bin) - .arg("app-server") - .arg("--listen") - .arg("stdio://") - .env("CODEX_HOME", codex_home) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::null()) - .spawn() - .with_context(|| format!("failed to spawn `{}` app-server", codex_bin.display()))?; - let mut server = InitializedAppServer { - child, - stdin: None, - stdout: None, - acknowledge_on_drop: false, - }; - server.stdin = Some( - server - .child - .stdin - .take() - .context("app-server stdin unavailable")?, - ); - server.stdout = Some(BufReader::new( - server - .child - .stdout - .take() - .context("app-server stdout unavailable")?, - )); - - let request = ClientRequest::Initialize { - request_id: request_id.clone(), - params: InitializeParams { - client_info: ClientInfo { - name: "codex-app-server-start-bench".to_string(), - title: Some("Codex App Server Start Benchmark".to_string()), - version: env!("CARGO_PKG_VERSION").to_string(), - }, - capabilities: Some(InitializeCapabilities { - experimental_api: false, - request_attestation: false, - opt_out_notification_methods: None, - }), - }, - }; - let stdin = server - .stdin - .as_mut() - .context("app-server stdin unavailable")?; - writeln!(stdin, "{}", serde_json::to_string(&request)?)?; - stdin - .flush() - .context("failed to flush initialize request")?; - - let mut line = String::new(); - loop { - line.clear(); - if server - .stdout - .as_mut() - .context("app-server stdout unavailable")? - .read_line(&mut line)? - == 0 - { - bail!("app-server closed stdout before returning initialize response"); - } - - match serde_json::from_str::(line.trim())? { - JSONRPCMessage::Response(JSONRPCResponse { id, result }) if id == request_id => { - let _: InitializeResponse = serde_json::from_value(result) - .context("initialize response missing expected payload")?; - server.acknowledge_on_drop = true; - return Ok(server); - } - JSONRPCMessage::Error(error) if error.id == request_id => { - bail!("initialize failed: {error:?}"); - } - JSONRPCMessage::Request(_) - | JSONRPCMessage::Response(_) - | JSONRPCMessage::Notification(_) - | JSONRPCMessage::Error(_) => {} - } - } -} - -impl Drop for InitializedAppServer { - fn drop(&mut self) { - if self.acknowledge_on_drop - && let Some(stdin) = self.stdin.as_mut() - { - let initialized = JSONRPCMessage::Notification(JSONRPCNotification { - method: "initialized".to_string(), - params: None, - }); - if let Ok(payload) = serde_json::to_string(&initialized) { - let _ = writeln!(stdin, "{payload}"); - let _ = stdin.flush(); - } - } - let _ = self.stdin.take(); - let _ = self.child.kill(); - let _ = self.child.wait(); - } -} diff --git a/justfile b/justfile index d061b90fd..d5e9fc3a3 100644 --- a/justfile +++ b/justfile @@ -2,9 +2,6 @@ set working-directory := "codex-rs" set positional-arguments rust_min_stack := "8388608" # 8 MiB -e2e_benchmark_packages := "codex-app-server-start-bench" -e2e_codex_bin := if os() == "windows" { "./target/release/codex.exe" } else { "./target/release/codex" } -e2e_smoke_codex_bin := if os() == "windows" { "./target/debug/codex.exe" } else { "./target/debug/codex" } # Display help help: @@ -33,11 +30,6 @@ app-server-test-client *args: cargo build -p codex-cli cargo run -p codex-app-server-test-client -- --codex-bin ./target/debug/codex "$@" -# Run end-to-end performance benchmarks that require a built Codex binary. -bench-e2e *args: - cargo build --release -p codex-cli --bin codex - for package in {{ e2e_benchmark_packages }}; do CODEX_BIN="{{ e2e_codex_bin }}" cargo run --release -p "$package" -- --bench "$@"; done - # Format Rust and Python SDK code. fmt: cargo fmt -- --config imports_granularity=Item 2>/dev/null @@ -63,19 +55,13 @@ test *args: RUST_MIN_STACK={{ rust_min_stack }} cargo nextest run --no-fail-fast "$@" just bench-smoke -# Run unit-test-style benchmark targets managed entirely by Cargo. -bench-unit *args: +# Run explicit workspace benchmark targets. +bench *args: cargo bench --workspace --bench '*' "$@" -# Smoke Cargo-managed benchmarks and compile e2e runners without measured binaries. +# Run benchmark targets once to ensure they start successfully. bench-smoke: - just bench-unit -- --test - for package in {{ e2e_benchmark_packages }}; do cargo build -p "$package" --bin "$package"; done - -# Run end-to-end performance benchmark targets once. -bench-e2e-smoke: - cargo build -p codex-cli --bin codex - for package in {{ e2e_benchmark_packages }}; do CODEX_BIN="{{ e2e_smoke_codex_bin }}" cargo run -p "$package" -- --test; done + just bench -- --test # Build and run Codex from source using Bazel. # Note we have to use the combination of `[no-cd]` and `--run_under="cd $PWD &&"`