mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
Reuse walk inventory for environment skill metadata (#30145)
## Why Environment skill discovery already asks the executor to run one `fs/walk`. That response contains every regular file path found under the selected root, including any `agents/openai.yaml` files. Today Core keeps the discovered `SKILL.md` paths but discards the rest of that file inventory. It then sends one `fs/getMetadata` request per skill just to ask whether `agents/openai.yaml` exists. A root with 66 skills and no metadata therefore pays for 66 unnecessary network round trips. ## What changes - Keep the `fs/walk` file and directory inventory for the duration of the scan. - Associate each discovered `SKILL.md` with metadata that is known present, known absent, or still requires a fallback probe. - Read a known `agents/openai.yaml` directly instead of statting it first. - Skip the metadata request entirely when a complete walk shows that the skill has no `agents` directory. - Read a known `SKILL.md` and `agents/openai.yaml` concurrently. - Keep parsing and validation in `core-skills`. The inventory is scan-local. This does not add another cache or change cache lifetime. ## Network impact For a complete scan of 66 valid skills with no `agents/openai.yaml`, and one root `.codex-plugin/plugin.json`: | Operation | Current | After this PR | | --- | ---: | ---: | | `fs/walk` | 1 | 1 | | Read `SKILL.md` | 66 | 66 | | Stat `agents/openai.yaml` | 66 | 0 | | Read `agents/openai.yaml` | 0 | 0 | | Stat plugin manifest | 1 | 1 | | Read plugin manifest | 1 | 1 | | **Total executor RPCs** | **135** | **69** | This removes exactly 66 request/response exchanges from the common cold scan. Warm scans remain at zero discovery RPCs because the thread-level executor catalog cache is unchanged. When metadata exists, each file still requires one read. This PR removes only the preceding existence check; it does not batch file contents into a new RPC. ## Correctness fallbacks Absence is trusted only when the walk is complete and the metadata directory was not present. Core keeps the existing `getMetadata` fallback when: - the walk was truncated; - the walk reported an error; or - an `agents` directory was observed but `openai.yaml` was not, which preserves support for file symlinks and traversal boundaries. ## Deliberate scope This PR changes only the environment skill loader and its existing filesystem-call regression coverage. It does not: - change `fs/walk` or any exec-server protocol; - add `readFiles` or a skills-list endpoint; - change thread caching; - change local skill discovery; - change exec-server request concurrency; or - optimize plugin-manifest lookup. The plugin-manifest stat is intentionally left in place, which is why this PR reaches 69 calls rather than the broader 68-call estimate. That lookup has separate alternate-path, ancestor, and symlink semantics and should not be mixed into this change.
This commit is contained in:
@@ -23,7 +23,6 @@ use super::ParsedSkillFrontmatter;
|
||||
use super::SKILLS_FILENAME;
|
||||
use super::SKILLS_METADATA_DIR;
|
||||
use super::SKILLS_METADATA_FILENAME;
|
||||
use super::SkillFileDiscovery;
|
||||
use super::SkillMetadataFile;
|
||||
use super::parse_skill_frontmatter_metadata_inner;
|
||||
use super::resolve_dependencies;
|
||||
@@ -34,6 +33,24 @@ use super::validate_len;
|
||||
const MAX_SKILLS_ENTRIES_PER_ROOT: usize = 20_000;
|
||||
const MAX_CONCURRENT_SKILL_LOADS: usize = 64;
|
||||
|
||||
struct EnvironmentSkillDiscovery {
|
||||
skills: Vec<DiscoveredEnvironmentSkill>,
|
||||
plugin_roots: HashSet<PathUri>,
|
||||
namespace_roots: HashSet<PathUri>,
|
||||
warnings: Vec<String>,
|
||||
}
|
||||
|
||||
struct DiscoveredEnvironmentSkill {
|
||||
path: PathUri,
|
||||
metadata: SkillMetadataDiscovery,
|
||||
}
|
||||
|
||||
enum SkillMetadataDiscovery {
|
||||
Present(PathUri),
|
||||
Absent,
|
||||
Probe(PathUri),
|
||||
}
|
||||
|
||||
/// URI-native metadata for one skill owned by an execution environment.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct EnvironmentSkillMetadata {
|
||||
@@ -67,28 +84,44 @@ impl EnvironmentSkillMetadata {
|
||||
|
||||
async fn parse(
|
||||
file_system: &dyn ExecutorFileSystem,
|
||||
path: &PathUri,
|
||||
skill: &DiscoveredEnvironmentSkill,
|
||||
plugin_namespace: Option<&str>,
|
||||
) -> Result<Self, String> {
|
||||
let contents = file_system
|
||||
.read_file_text(path, /*sandbox*/ None)
|
||||
.await
|
||||
.map_err(|err| format!("failed to read file: {err}"))?;
|
||||
let (contents, discovered_metadata) = match &skill.metadata {
|
||||
SkillMetadataDiscovery::Present(metadata_path) => {
|
||||
let (contents, metadata) = tokio::join!(
|
||||
read_skill_contents(file_system, &skill.path),
|
||||
read_skill_metadata(file_system, metadata_path),
|
||||
);
|
||||
(contents?, metadata)
|
||||
}
|
||||
SkillMetadataDiscovery::Absent | SkillMetadataDiscovery::Probe(_) => (
|
||||
read_skill_contents(file_system, &skill.path).await?,
|
||||
(None, None),
|
||||
),
|
||||
};
|
||||
let ParsedSkillFrontmatter {
|
||||
name: base_name,
|
||||
description,
|
||||
short_description,
|
||||
} = parse_skill_frontmatter_metadata_inner(&contents, || default_skill_name(path))
|
||||
} = parse_skill_frontmatter_metadata_inner(&contents, || default_skill_name(&skill.path))
|
||||
.map_err(|err| err.to_string())?;
|
||||
let name = plugin_namespace
|
||||
.map(|namespace| format!("{namespace}:{base_name}"))
|
||||
.unwrap_or(base_name);
|
||||
validate_len(&name, MAX_QUALIFIED_NAME_LEN, "qualified name")
|
||||
.map_err(|err| err.to_string())?;
|
||||
let (dependencies, policy) = load_skill_metadata(file_system, path).await;
|
||||
let (dependencies, policy) = match &skill.metadata {
|
||||
SkillMetadataDiscovery::Present(_) | SkillMetadataDiscovery::Absent => {
|
||||
discovered_metadata
|
||||
}
|
||||
SkillMetadataDiscovery::Probe(metadata_path) => {
|
||||
probe_skill_metadata(file_system, metadata_path).await
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
path_to_skills_md: path.clone(),
|
||||
path_to_skills_md: skill.path.clone(),
|
||||
name,
|
||||
description,
|
||||
short_description,
|
||||
@@ -125,6 +158,7 @@ pub async fn load_environment_skills_from_root(
|
||||
.await
|
||||
{
|
||||
Ok(walk) => {
|
||||
let inventory_complete = !walk.truncated && walk.errors.is_empty();
|
||||
let mut warnings = walk
|
||||
.errors
|
||||
.into_iter()
|
||||
@@ -141,10 +175,13 @@ pub async fn load_environment_skills_from_root(
|
||||
));
|
||||
}
|
||||
let mut skill_files = Vec::new();
|
||||
let mut file_paths = HashSet::new();
|
||||
let mut directory_paths = HashSet::new();
|
||||
let mut plugin_roots = HashSet::new();
|
||||
for entry in walk.entries {
|
||||
match entry.kind {
|
||||
WalkEntryKind::Directory => {
|
||||
directory_paths.insert(entry.path.clone());
|
||||
if DISCOVERABLE_PLUGIN_MANIFEST_PATHS
|
||||
.iter()
|
||||
.any(|path| path.split('/').next() == entry.path.basename().as_deref())
|
||||
@@ -154,40 +191,53 @@ pub async fn load_environment_skills_from_root(
|
||||
}
|
||||
}
|
||||
WalkEntryKind::File => {
|
||||
file_paths.insert(entry.path.clone());
|
||||
if entry.path.basename().as_deref() == Some(SKILLS_FILENAME) {
|
||||
skill_files.push(entry.path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
SkillFileDiscovery {
|
||||
skill_files,
|
||||
let skills = skill_files
|
||||
.into_iter()
|
||||
.map(|path| DiscoveredEnvironmentSkill {
|
||||
metadata: discover_skill_metadata(
|
||||
&path,
|
||||
&file_paths,
|
||||
&directory_paths,
|
||||
inventory_complete,
|
||||
),
|
||||
path,
|
||||
})
|
||||
.collect();
|
||||
EnvironmentSkillDiscovery {
|
||||
skills,
|
||||
plugin_roots,
|
||||
namespace_roots: HashSet::from([root.clone()]),
|
||||
warnings,
|
||||
}
|
||||
}
|
||||
Err(error) if error.kind() == io::ErrorKind::NotFound => SkillFileDiscovery {
|
||||
skill_files: Vec::new(),
|
||||
Err(error) if error.kind() == io::ErrorKind::NotFound => EnvironmentSkillDiscovery {
|
||||
skills: Vec::new(),
|
||||
plugin_roots: HashSet::new(),
|
||||
namespace_roots: HashSet::new(),
|
||||
warnings: Vec::new(),
|
||||
},
|
||||
Err(error) => SkillFileDiscovery {
|
||||
skill_files: Vec::new(),
|
||||
Err(error) => EnvironmentSkillDiscovery {
|
||||
skills: Vec::new(),
|
||||
plugin_roots: HashSet::new(),
|
||||
namespace_roots: HashSet::new(),
|
||||
warnings: vec![format!("failed to walk skills root {root}: {error:#}")],
|
||||
},
|
||||
};
|
||||
outcome.warnings.extend(discovery.warnings);
|
||||
if discovery.skill_files.is_empty() {
|
||||
if discovery.skills.is_empty() {
|
||||
return outcome;
|
||||
}
|
||||
|
||||
let mut skill_ancestors = HashSet::new();
|
||||
for skill_path in &discovery.skill_files {
|
||||
let mut ancestor = skill_path.parent();
|
||||
for skill in &discovery.skills {
|
||||
let mut ancestor = skill.path.parent();
|
||||
while let Some(path) = ancestor {
|
||||
skill_ancestors.insert(path.clone());
|
||||
ancestor = path.parent();
|
||||
@@ -224,9 +274,9 @@ pub async fn load_environment_skills_from_root(
|
||||
|
||||
// Remote executors can multiplex these independent per-skill reads, so polling a bounded
|
||||
// number together allows the I/O for each skill and its metadata to happen concurrently.
|
||||
let skill_results = futures::stream::iter(discovery.skill_files)
|
||||
.map(|path| {
|
||||
let mut ancestor = path.parent();
|
||||
let skill_results = futures::stream::iter(discovery.skills)
|
||||
.map(|skill| {
|
||||
let mut ancestor = skill.path.parent();
|
||||
let plugin_namespace = loop {
|
||||
let Some(current) = ancestor else {
|
||||
break None;
|
||||
@@ -236,9 +286,10 @@ pub async fn load_environment_skills_from_root(
|
||||
}
|
||||
ancestor = current.parent();
|
||||
};
|
||||
let path = skill.path.clone();
|
||||
async move {
|
||||
let result =
|
||||
EnvironmentSkillMetadata::parse(file_system, &path, plugin_namespace).await;
|
||||
EnvironmentSkillMetadata::parse(file_system, &skill, plugin_namespace).await;
|
||||
(path, result)
|
||||
}
|
||||
})
|
||||
@@ -267,20 +318,48 @@ pub async fn load_environment_skills_from_root(
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn load_skill_metadata(
|
||||
fn discover_skill_metadata(
|
||||
skill_path: &PathUri,
|
||||
file_paths: &HashSet<PathUri>,
|
||||
directory_paths: &HashSet<PathUri>,
|
||||
inventory_complete: bool,
|
||||
) -> SkillMetadataDiscovery {
|
||||
let Some(skill_dir) = skill_path.parent() else {
|
||||
return SkillMetadataDiscovery::Absent;
|
||||
};
|
||||
let Ok(metadata_dir) = skill_dir.join(SKILLS_METADATA_DIR) else {
|
||||
return SkillMetadataDiscovery::Absent;
|
||||
};
|
||||
let Ok(metadata_path) = metadata_dir.join(SKILLS_METADATA_FILENAME) else {
|
||||
return SkillMetadataDiscovery::Absent;
|
||||
};
|
||||
if file_paths.contains(&metadata_path) {
|
||||
SkillMetadataDiscovery::Present(metadata_path)
|
||||
} else if inventory_complete && !directory_paths.contains(&metadata_dir) {
|
||||
SkillMetadataDiscovery::Absent
|
||||
} else {
|
||||
// The walk can omit entries after an error or traversal limit. It also omits file
|
||||
// symlinks, so keep the existing probe when the metadata directory itself was observed.
|
||||
SkillMetadataDiscovery::Probe(metadata_path)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_skill_contents(
|
||||
file_system: &dyn ExecutorFileSystem,
|
||||
skill_path: &PathUri,
|
||||
) -> Result<String, String> {
|
||||
file_system
|
||||
.read_file_text(skill_path, /*sandbox*/ None)
|
||||
.await
|
||||
.map_err(|err| format!("failed to read file: {err}"))
|
||||
}
|
||||
|
||||
async fn probe_skill_metadata(
|
||||
file_system: &dyn ExecutorFileSystem,
|
||||
metadata_path: &PathUri,
|
||||
) -> (Option<SkillDependencies>, Option<SkillPolicy>) {
|
||||
let Some(skill_dir) = skill_path.parent() else {
|
||||
return (None, None);
|
||||
};
|
||||
let Ok(metadata_path) =
|
||||
skill_dir.join(&format!("{SKILLS_METADATA_DIR}/{SKILLS_METADATA_FILENAME}"))
|
||||
else {
|
||||
return (None, None);
|
||||
};
|
||||
match file_system
|
||||
.get_metadata(&metadata_path, /*sandbox*/ None)
|
||||
.get_metadata(metadata_path, /*sandbox*/ None)
|
||||
.await
|
||||
{
|
||||
Ok(metadata) if metadata.is_file => {}
|
||||
@@ -291,8 +370,15 @@ async fn load_skill_metadata(
|
||||
return (None, None);
|
||||
}
|
||||
}
|
||||
read_skill_metadata(file_system, metadata_path).await
|
||||
}
|
||||
|
||||
async fn read_skill_metadata(
|
||||
file_system: &dyn ExecutorFileSystem,
|
||||
metadata_path: &PathUri,
|
||||
) -> (Option<SkillDependencies>, Option<SkillPolicy>) {
|
||||
let contents = match file_system
|
||||
.read_file_text(&metadata_path, /*sandbox*/ None)
|
||||
.read_file_text(metadata_path, /*sandbox*/ None)
|
||||
.await
|
||||
{
|
||||
Ok(contents) => contents,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use std::fs;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use codex_core_skills::loader::EnvironmentSkillMetadata;
|
||||
use codex_core_skills::loader::load_environment_skills_from_root;
|
||||
@@ -13,6 +15,8 @@ use codex_exec_server::FileSystemSandboxContext;
|
||||
use codex_exec_server::LOCAL_FS;
|
||||
use codex_exec_server::ReadDirectoryEntry;
|
||||
use codex_exec_server::RemoveOptions;
|
||||
use codex_exec_server::WalkOptions;
|
||||
use codex_exec_server::WalkOutcome;
|
||||
use codex_utils_path_uri::PathUri;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::tempdir;
|
||||
@@ -20,6 +24,15 @@ use tempfile::tempdir;
|
||||
struct RecordingFileSystem<'a> {
|
||||
inner: &'a dyn ExecutorFileSystem,
|
||||
read_files: Mutex<Vec<PathUri>>,
|
||||
metadata_files: Mutex<Vec<PathUri>>,
|
||||
walks: AtomicUsize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct FileSystemCalls {
|
||||
walks: usize,
|
||||
read_files: Vec<PathUri>,
|
||||
metadata_files: Vec<PathUri>,
|
||||
}
|
||||
|
||||
impl<'a> RecordingFileSystem<'a> {
|
||||
@@ -27,14 +40,29 @@ impl<'a> RecordingFileSystem<'a> {
|
||||
Self {
|
||||
inner,
|
||||
read_files: Mutex::new(Vec::new()),
|
||||
metadata_files: Mutex::new(Vec::new()),
|
||||
walks: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_files(&self) -> Vec<PathUri> {
|
||||
self.read_files
|
||||
fn calls(&self) -> FileSystemCalls {
|
||||
let mut read_files = self
|
||||
.read_files
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner)
|
||||
.clone()
|
||||
.clone();
|
||||
read_files.sort_by_key(ToString::to_string);
|
||||
let mut metadata_files = self
|
||||
.metadata_files
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner)
|
||||
.clone();
|
||||
metadata_files.sort_by_key(ToString::to_string);
|
||||
FileSystemCalls {
|
||||
walks: self.walks.load(Ordering::Relaxed),
|
||||
read_files,
|
||||
metadata_files,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,6 +118,10 @@ impl ExecutorFileSystem for RecordingFileSystem<'_> {
|
||||
path: &'a PathUri,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, FileMetadata> {
|
||||
self.metadata_files
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner)
|
||||
.push(path.clone());
|
||||
self.inner.get_metadata(path, sandbox)
|
||||
}
|
||||
|
||||
@@ -101,6 +133,16 @@ impl ExecutorFileSystem for RecordingFileSystem<'_> {
|
||||
self.inner.read_directory(path, sandbox)
|
||||
}
|
||||
|
||||
fn walk<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
options: WalkOptions,
|
||||
sandbox: Option<&'a FileSystemSandboxContext>,
|
||||
) -> ExecutorFileSystemFuture<'a, WalkOutcome> {
|
||||
self.walks.fetch_add(1, Ordering::Relaxed);
|
||||
self.inner.walk(path, options, sandbox)
|
||||
}
|
||||
|
||||
fn remove<'a>(
|
||||
&'a self,
|
||||
path: &'a PathUri,
|
||||
@@ -200,7 +242,8 @@ async fn loads_nearest_plugin_namespaces_without_reading_unused_sibling_manifest
|
||||
);
|
||||
|
||||
let mut manifest_reads = file_system
|
||||
.read_files()
|
||||
.calls()
|
||||
.read_files
|
||||
.into_iter()
|
||||
.filter(|path| path.basename().as_deref() == Some("plugin.json"))
|
||||
.collect::<Vec<_>>();
|
||||
@@ -214,3 +257,73 @@ async fn loads_nearest_plugin_namespaces_without_reading_unused_sibling_manifest
|
||||
expected_manifest_reads.sort_by_key(ToString::to_string);
|
||||
assert_eq!(manifest_reads, expected_manifest_reads);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reuses_walk_inventory_for_missing_skill_metadata() {
|
||||
const SKILL_COUNT: usize = 66;
|
||||
|
||||
let root = tempdir().expect("tempdir");
|
||||
let manifest_path = root.path().join(".codex-plugin/plugin.json");
|
||||
fs::create_dir_all(manifest_path.parent().expect("manifest parent")).expect("manifest dir");
|
||||
fs::write(&manifest_path, r#"{"name":"inventory"}"#).expect("manifest");
|
||||
|
||||
let mut skill_paths = Vec::new();
|
||||
for index in 0..SKILL_COUNT {
|
||||
let name = format!("skill-{index}");
|
||||
let skill_path = root.path().join(&name).join("SKILL.md");
|
||||
fs::create_dir_all(skill_path.parent().expect("skill parent")).expect("skill dir");
|
||||
fs::write(
|
||||
&skill_path,
|
||||
format!("---\nname: {name}\ndescription: {name} skill.\n---\n"),
|
||||
)
|
||||
.expect("skill");
|
||||
skill_paths.push(skill_path);
|
||||
}
|
||||
|
||||
let file_system = RecordingFileSystem::new(LOCAL_FS.as_ref());
|
||||
let root_uri = PathUri::from_host_native_path(root.path()).expect("root URI");
|
||||
let outcome = load_environment_skills_from_root(
|
||||
&file_system,
|
||||
&root_uri,
|
||||
/*restriction_product*/ None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut expected_skills = skill_paths
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, skill_path)| EnvironmentSkillMetadata {
|
||||
path_to_skills_md: PathUri::from_host_native_path(skill_path).unwrap(),
|
||||
name: format!("inventory:skill-{index}"),
|
||||
description: format!("skill-{index} skill."),
|
||||
short_description: None,
|
||||
dependencies: None,
|
||||
policy: None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
expected_skills.sort_by(|left, right| {
|
||||
left.name.cmp(&right.name).then_with(|| {
|
||||
left.path_to_skills_md
|
||||
.to_string()
|
||||
.cmp(&right.path_to_skills_md.to_string())
|
||||
})
|
||||
});
|
||||
assert_eq!(outcome.skills, expected_skills);
|
||||
assert_eq!(outcome.warnings, Vec::<String>::new());
|
||||
|
||||
let mut expected_read_files = skill_paths
|
||||
.iter()
|
||||
.map(|path| PathUri::from_host_native_path(path).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
let manifest_uri = PathUri::from_host_native_path(manifest_path).unwrap();
|
||||
expected_read_files.push(manifest_uri.clone());
|
||||
expected_read_files.sort_by_key(ToString::to_string);
|
||||
assert_eq!(
|
||||
file_system.calls(),
|
||||
FileSystemCalls {
|
||||
walks: 1,
|
||||
read_files: expected_read_files,
|
||||
metadata_files: vec![manifest_uri],
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user