From c2fbf4247abb9718a280c2647cedeab87db2e169 Mon Sep 17 00:00:00 2001 From: jif Date: Sun, 21 Jun 2026 13:04:18 +0100 Subject: [PATCH] Parallelize skill metadata stats (#29326) ## Summary This switches skill discovery to the simpler same-connection scalar request shape. After reading a skills directory, discovery now starts the existing `fs/getMetadata` calls for all visible entries in that directory before awaiting the results. There is no JSON-RPC batch frame and no new filesystem API; remote filesystems use the existing request-id multiplexing on the same exec-server connection. This is the scoped alternative to the batch-frame approach in #29074 / #29075. ## What changed - Collect visible directory entries before processing them. - Run their existing `fs.get_metadata(...)` calls with `join_all`. - Process the results in the original directory order, so skill discovery behavior stays the same. ## Benchmarks Fresh local benchmark against generated skill trees over a real exec-server remote filesystem. The benchmark calls the actual `load_skills_from_roots` path, so this includes directory reads, metadata stats, `SKILL.md` reads, and parsing. Times are p50 milliseconds from 5 samples after 1 warmup, using warmed runs. | Scenario | Legacy `main` | Batch frame stack (#29074 / #29075) | Same-connection scalar stack | | --- | ---: | ---: | ---: | | 100 flat skills | 377.4 | 389.0 | 378.6 | | 500 flat skills | 1983.2 | 1856.6 | 1757.5 | Takeaway: for the actual skill discovery path, same-connection scalar is tied with legacy at 100 skills and best at 500 skills. The batch-frame stack does not show enough win here to justify the extra protocol/API surface. Benchmark command: - `just test -p codex-exec-server benchmark_remote_skill_discovery --run-ignored ignored-only --no-capture` Checked locally with: - `just test -p codex-core-skills` - `just bazel-lock-update` - `just bazel-lock-check` --- codex-rs/Cargo.lock | 1 + codex-rs/core-skills/Cargo.toml | 1 + codex-rs/core-skills/src/loader.rs | 31 ++++++++++++++++++++++-------- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index f6f0e5583..f35402efd 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2809,6 +2809,7 @@ dependencies = [ "codex-utils-plugins", "dirs", "dunce", + "futures", "pretty_assertions", "serde", "serde_json", diff --git a/codex-rs/core-skills/Cargo.toml b/codex-rs/core-skills/Cargo.toml index 7748540ac..3ac82be9a 100644 --- a/codex-rs/core-skills/Cargo.toml +++ b/codex-rs/core-skills/Cargo.toml @@ -31,6 +31,7 @@ codex-utils-path-uri = { workspace = true } codex-utils-plugins = { workspace = true } dirs = { workspace = true } dunce = { workspace = true } +futures = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } serde_yaml = { workspace = true } diff --git a/codex-rs/core-skills/src/loader.rs b/codex-rs/core-skills/src/loader.rs index aa9688f9a..b377e909e 100644 --- a/codex-rs/core-skills/src/loader.rs +++ b/codex-rs/core-skills/src/loader.rs @@ -22,6 +22,7 @@ use codex_utils_path_uri::PathUri; use codex_utils_plugins::PluginSkillRoot; use codex_utils_plugins::plugin_namespace_for_skill_path; use dirs::home_dir; +use futures::future::join_all; use serde::Deserialize; use std::collections::HashSet; use std::collections::VecDeque; @@ -534,15 +535,29 @@ async fn discover_skills_under_root( } }; - for entry in entries { - let file_name = entry.file_name; - if file_name.starts_with('.') { - continue; - } + let paths = entries + .into_iter() + .filter_map(|entry| { + let file_name = entry.file_name; + if file_name.starts_with('.') { + return None; + } + let path = dir.join(&file_name); + let path_uri = PathUri::from_abs_path(&path); + Some((file_name, path, path_uri)) + }) + .collect::>(); + let metadata_results = join_all( + paths + .iter() + .map(|(_, _, path_uri)| fs.get_metadata(path_uri, /*sandbox*/ None)), + ) + .await; - let path = dir.join(&file_name); - let path_uri = PathUri::from_abs_path(&path); - let metadata = match fs.get_metadata(&path_uri, /*sandbox*/ None).await { + for ((file_name, path, path_uri), metadata_result) in + paths.into_iter().zip(metadata_results) + { + let metadata = match metadata_result { Ok(metadata) => metadata, Err(e) => { error!("failed to stat skills path {}: {e:#}", path.display());