[codex] Load AGENTS.md from all bound environments (#27696)

## Why

We already have the machinery to support multiple environments on a
single thread, but we only show the model the contents of `AGENTS.md`
files in the primary environment.

We should show the model all of the relevant project instructions when
we know there's more than one environment.

## Known Gaps

As discussed in the RFC, this implementation:

1. doesn't handle environments being added/removed to/from the thread
after its creation
2. it doesn't enforce an aggregate context budget across environments,
and instead applies the configured project maximum independently to each
environment

## Implementation

- Discover project instructions in environment order with an independent
byte budget per environment and preserve source provenance/order.
- Keep the legacy fragment byte-for-byte when exactly one environment
contributes project instructions; use environment-labeled sections when
two or more environments contribute.
- Freeze the complete rendered fragment in `LoadedAgentsMd`, insert it
directly into requests, and recognize both layouts in contextual and
memory filtering.
- Add exact rendering, independent-budget, source-order,
creation-snapshot, and consumer coverage without changing app-server
schemas.
This commit is contained in:
Adam Perry @ OpenAI
2026-06-12 00:10:06 -07:00
committed by GitHub
Unverified
parent 7a19b14229
commit bf667c7003
15 changed files with 641 additions and 77 deletions
+131 -10
View File
@@ -16,6 +16,9 @@
//! 3. We do **not** walk past the project root.
use crate::config::Config;
use crate::context::ContextualUserFragment;
use crate::context::UserInstructions as ContextUserInstructions;
use crate::environment_selection::ResolvedTurnEnvironments;
use codex_app_server_protocol::ConfigLayerSource;
use codex_config::ConfigLayerStackOrdering;
use codex_config::default_project_root_markers;
@@ -45,15 +48,26 @@ const AGENTS_MD_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
pub(crate) async fn load_project_instructions(
config: &mut Config,
user_instructions: Option<UserInstructions>,
fs: Option<&dyn ExecutorFileSystem>,
environments: &ResolvedTurnEnvironments,
) -> Option<LoadedAgentsMd> {
let mut loaded = LoadedAgentsMd::from_user_instructions(user_instructions);
if let Some(fs) = fs {
match read_agents_md(config, fs).await {
for turn_environment in &environments.turn_environments {
let filesystem = turn_environment.environment.get_filesystem();
match read_agents_md(
config,
filesystem.as_ref(),
&turn_environment.environment_id,
&turn_environment.cwd,
)
.await
{
Ok(Some(docs)) => loaded.entries.extend(docs.entries),
Ok(None) => {}
Err(e) => {
error!("error trying to find AGENTS.md docs: {e:#}");
error!(
environment_id = turn_environment.environment_id,
"error trying to find AGENTS.md docs: {e:#}"
);
}
}
}
@@ -77,6 +91,8 @@ pub(crate) async fn load_project_instructions(
async fn read_agents_md(
config: &mut Config,
fs: &dyn ExecutorFileSystem,
environment_id: &str,
cwd: &AbsolutePathBuf,
) -> io::Result<Option<LoadedAgentsMd>> {
let max_total = config.project_doc_max_bytes;
@@ -84,7 +100,7 @@ async fn read_agents_md(
return Ok(None);
}
let paths = agents_md_paths(config, fs).await?;
let paths = agents_md_paths(config, cwd, fs).await?;
if paths.is_empty() {
return Ok(None);
}
@@ -129,7 +145,11 @@ async fn read_agents_md(
if !text.trim().is_empty() {
loaded.entries.push(InstructionEntry {
contents: text,
provenance: InstructionProvenance::Project(p),
provenance: InstructionProvenance::Project {
source_path: p,
environment_id: environment_id.to_string(),
cwd: cwd.clone(),
},
});
remaining = remaining.saturating_sub(data.len() as u64);
}
@@ -146,9 +166,10 @@ async fn read_agents_md(
/// directory, inclusive. Symlinks are allowed.
async fn agents_md_paths(
config: &Config,
cwd: &AbsolutePathBuf,
fs: &dyn ExecutorFileSystem,
) -> io::Result<Vec<AbsolutePathBuf>> {
let dir = config.cwd.clone();
let dir = cwd.clone();
let mut merged = TomlValue::Table(toml::map::Map::new());
for layer in config.config_layer_stack.get_layers(
@@ -309,6 +330,14 @@ impl LoadedAgentsMd {
/// Returns the concatenated model-visible instruction text.
pub fn text(&self) -> String {
if self.has_multiple_project_environments() {
self.environment_labeled_text()
} else {
self.legacy_text()
}
}
fn legacy_text(&self) -> String {
let mut output = String::new();
let mut has_previous = false;
let mut previous_was_project = false;
@@ -317,7 +346,7 @@ impl LoadedAgentsMd {
has_previous = true;
}
for entry in &self.entries {
let is_project = matches!(&entry.provenance, InstructionProvenance::Project(_));
let is_project = matches!(&entry.provenance, InstructionProvenance::Project { .. });
if has_previous {
// The project-doc marker tells the model where workspace-scoped
// instructions begin, so it is only needed on the transition
@@ -336,6 +365,68 @@ impl LoadedAgentsMd {
output
}
fn environment_labeled_text(&self) -> String {
let mut output = String::new();
let mut has_previous = false;
let mut previous_environment: Option<(&str, &AbsolutePathBuf)> = None;
if let Some(instructions) = &self.user_instructions {
output.push_str(&instructions.text);
has_previous = true;
}
for entry in &self.entries {
match &entry.provenance {
InstructionProvenance::Project {
environment_id,
cwd,
..
} => {
if has_previous {
output.push_str("\n\n");
}
// One environment can contribute several hierarchical AGENTS.md files from
// its project root through its cwd. Label that environment once for the
// complete group rather than repeating the label before every file.
let environment = (environment_id.as_str(), cwd);
if previous_environment != Some(environment) {
output.push_str(&format!(
"for `{}` with root {}\n\n",
environment_id,
cwd.display()
));
}
output.push_str(&entry.contents);
previous_environment = Some(environment);
}
InstructionProvenance::Internal => {
if has_previous {
output.push_str("\n\n");
}
output.push_str(&entry.contents);
previous_environment = None;
}
}
has_previous = true;
}
output
}
/// Returns the complete model-visible contextual user fragment.
pub(crate) fn render(&self) -> String {
// One contributing project environment retains the legacy cwd wrapper. With two or more,
// the body labels every contributing environment itself, so the outer cwd is omitted.
let directory = if self.has_multiple_project_environments() {
None
} else {
self.single_project_cwd()
.map(|cwd| cwd.to_string_lossy().into_owned())
};
ContextUserInstructions {
directory,
text: self.text(),
}
.render()
}
/// Returns the host-provided user instructions.
pub(crate) fn user_instructions(&self) -> Option<&UserInstructions> {
self.user_instructions.as_ref()
@@ -352,6 +443,31 @@ impl LoadedAgentsMd {
.filter_map(|entry| entry.provenance.path()),
)
}
fn has_multiple_project_environments(&self) -> bool {
let mut first_environment_id = None;
self.entries.iter().any(|entry| {
let InstructionProvenance::Project { environment_id, .. } = &entry.provenance else {
return false;
};
match first_environment_id {
Some(first_environment_id) => first_environment_id != environment_id,
None => {
first_environment_id = Some(environment_id);
false
}
}
})
}
fn single_project_cwd(&self) -> Option<&AbsolutePathBuf> {
self.entries
.iter()
.find_map(|entry| match &entry.provenance {
InstructionProvenance::Project { cwd, .. } => Some(cwd),
InstructionProvenance::Internal => None,
})
}
}
/// One model-visible instruction and its provenance.
@@ -367,7 +483,12 @@ struct InstructionEntry {
#[derive(Clone, Debug, PartialEq, Eq)]
enum InstructionProvenance {
/// Workspace instructions discovered from project AGENTS.md files.
Project(AbsolutePathBuf),
Project {
/// Exact AGENTS.md file, distinct from the environment's selected cwd.
source_path: AbsolutePathBuf,
environment_id: String,
cwd: AbsolutePathBuf,
},
/// Instructions without a file source, including internally defined guidance.
Internal,
@@ -376,7 +497,7 @@ enum InstructionProvenance {
impl InstructionProvenance {
fn path(&self) -> Option<&AbsolutePathBuf> {
match self {
Self::Project(path) => Some(path),
Self::Project { source_path, .. } => Some(source_path),
Self::Internal => None,
}
}
+283 -12
View File
@@ -1,11 +1,14 @@
use super::*;
use crate::config::ConfigBuilder;
use crate::environment_selection::ResolvedTurnEnvironments;
use crate::session::turn_context::TurnEnvironment;
use codex_config::ConfigLayerEntry;
use codex_config::ConfigLayerStack;
use codex_config::ConfigRequirements;
use codex_config::ConfigRequirementsToml;
use codex_exec_server::CopyOptions;
use codex_exec_server::CreateDirectoryOptions;
use codex_exec_server::Environment;
use codex_exec_server::ExecutorFileSystemFuture;
use codex_exec_server::FileMetadata;
use codex_exec_server::FileSystemSandboxContext;
@@ -26,6 +29,7 @@ use std::ops::Deref;
use std::ops::DerefMut;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use tempfile::TempDir;
#[derive(Clone, Copy)]
@@ -228,10 +232,11 @@ async fn get_user_instructions(config: &TestConfig) -> Option<String> {
async fn load_agents_md(config: &TestConfig, warnings: &mut Vec<String>) -> Option<LoadedAgentsMd> {
let mut core_config = config.config.clone();
let existing_warning_count = core_config.startup_warnings.len();
let environments = resolved_local_environments([("local", core_config.cwd.clone())]);
let loaded = load_project_instructions(
&mut core_config,
config.user_instructions.clone(),
Some(LOCAL_FS.as_ref()),
&environments,
)
.await;
warnings.extend(
@@ -244,7 +249,34 @@ async fn load_agents_md(config: &TestConfig, warnings: &mut Vec<String>) -> Opti
}
async fn agents_md_paths(config: &TestConfig) -> std::io::Result<Vec<AbsolutePathBuf>> {
super::agents_md_paths(&config.config, LOCAL_FS.as_ref()).await
super::agents_md_paths(&config.config, &config.cwd, LOCAL_FS.as_ref()).await
}
fn resolved_local_environments<const N: usize>(
environments: [(&str, AbsolutePathBuf); N],
) -> ResolvedTurnEnvironments {
ResolvedTurnEnvironments {
turn_environments: environments
.into_iter()
.map(|(environment_id, cwd)| TurnEnvironment {
environment_id: environment_id.to_string(),
environment: Arc::new(
Environment::create_for_tests(/*exec_server_url*/ None)
.expect("local environment"),
),
cwd,
shell: None,
})
.collect(),
}
}
fn project_provenance(path: AbsolutePathBuf, cwd: AbsolutePathBuf) -> InstructionProvenance {
InstructionProvenance::Project {
source_path: path,
environment_id: "local".to_string(),
cwd,
}
}
fn assert_invalid_utf8_warning(warnings: &[String], source: &str, path: &Path) {
@@ -466,11 +498,14 @@ async fn total_byte_limit_truncates_later_project_docs() {
entries: vec![
InstructionEntry {
contents: "root".to_string(),
provenance: InstructionProvenance::Project(repo.path().join("AGENTS.md").abs()),
provenance: project_provenance(
repo.path().join("AGENTS.md").abs(),
config.cwd.clone(),
),
},
InstructionEntry {
contents: "abc".to_string(),
provenance: InstructionProvenance::Project(config.cwd.join("AGENTS.md")),
provenance: project_provenance(config.cwd.join("AGENTS.md"), config.cwd.clone()),
},
],
};
@@ -490,7 +525,8 @@ async fn read_agents_md_propagates_metadata_errors() {
failure: InjectedFailure::Metadata(io::ErrorKind::PermissionDenied),
};
let err = read_agents_md(&mut config.config, &fs)
let cwd = config.cwd.clone();
let err = read_agents_md(&mut config.config, &fs, "local", &cwd)
.await
.expect_err("metadata error");
@@ -507,7 +543,8 @@ async fn read_agents_md_propagates_read_errors() {
failure: InjectedFailure::Read(io::ErrorKind::PermissionDenied),
};
let err = read_agents_md(&mut config.config, &fs)
let cwd = config.cwd.clone();
let err = read_agents_md(&mut config.config, &fs, "local", &cwd)
.await
.expect_err("read error");
@@ -524,7 +561,8 @@ async fn read_agents_md_ignores_files_removed_after_discovery() {
failure: InjectedFailure::Read(io::ErrorKind::NotFound),
};
let loaded = read_agents_md(&mut config.config, &fs)
let cwd = config.cwd.clone();
let loaded = read_agents_md(&mut config.config, &fs, "local", &cwd)
.await
.expect("removed file is recoverable");
@@ -591,6 +629,240 @@ async fn merges_existing_instructions_with_agents_md() {
assert_eq!(res, expected);
}
#[tokio::test]
async fn multiple_environment_docs_use_labeled_layout_and_preserve_source_order() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::create_dir(primary.path().join(".git")).unwrap();
fs::write(primary.path().join("AGENTS.md"), "primary root doc").unwrap();
let primary_nested = primary.path().join("nested");
fs::create_dir(&primary_nested).unwrap();
fs::write(primary_nested.join("AGENTS.md"), "primary nested doc").unwrap();
fs::write(secondary.path().join("AGENTS.md"), "secondary doc").unwrap();
let mut config = make_config(&primary, /*limit*/ 4096, Some("global instructions")).await;
config.cwd = primary_nested.abs();
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let user_instructions = config.user_instructions.clone();
let loaded = load_project_instructions(&mut config.config, user_instructions, &environments)
.await
.expect("instructions expected");
let inner = format!(
r#"global instructions
for `primary` with root {}
primary root doc
primary nested doc
for `secondary` with root {}
secondary doc"#,
primary_nested.display(),
secondary.path().display(),
);
assert_eq!(loaded.environment_labeled_text(), inner);
assert_eq!(loaded.text(), inner);
let expected_fragment = format!(
r#"# AGENTS.md instructions
<INSTRUCTIONS>
{inner}
</INSTRUCTIONS>"#
);
assert_eq!(loaded.render(), expected_fragment);
assert_eq!(
loaded.sources().cloned().collect::<Vec<_>>(),
vec![
config
.user_instructions
.as_ref()
.expect("global instructions")
.source
.clone(),
primary.path().join("AGENTS.md").abs(),
primary_nested.join("AGENTS.md").abs(),
secondary.path().join("AGENTS.md").abs(),
]
);
}
#[tokio::test]
async fn secondary_only_project_doc_uses_single_contributor_layout() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::write(secondary.path().join("AGENTS.md"), "secondary doc").unwrap();
let mut config = make_config(&primary, /*limit*/ 4096, Some("global instructions")).await;
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let user_instructions = config.user_instructions.clone();
let loaded = load_project_instructions(&mut config.config, user_instructions, &environments)
.await
.expect("instructions expected");
let inner = format!("global instructions{AGENTS_MD_SEPARATOR}secondary doc");
assert_eq!(loaded.legacy_text(), inner);
assert_eq!(loaded.text(), inner);
let expected_fragment = format!(
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\n{inner}\n</INSTRUCTIONS>",
secondary.path().display()
);
assert_eq!(loaded.render(), expected_fragment);
}
#[tokio::test]
async fn primary_only_project_doc_preserves_legacy_layout_with_multiple_bound_environments() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::write(primary.path().join("AGENTS.md"), "primary doc").unwrap();
let mut config = make_config(&primary, /*limit*/ 4096, Some("global instructions")).await;
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let user_instructions = config.user_instructions.clone();
let loaded = load_project_instructions(&mut config.config, user_instructions, &environments)
.await
.expect("instructions expected");
let inner = format!("global instructions{AGENTS_MD_SEPARATOR}primary doc");
assert_eq!(loaded.legacy_text(), inner);
assert_eq!(loaded.text(), inner);
let expected_fragment = format!(
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\n{inner}\n</INSTRUCTIONS>",
primary.path().display()
);
assert_eq!(loaded.render(), expected_fragment);
}
#[tokio::test]
async fn project_doc_byte_limit_is_applied_independently_per_environment() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::write(primary.path().join("AGENTS.md"), "ABCDE").unwrap();
fs::write(secondary.path().join("AGENTS.md"), "VWXYZ").unwrap();
let mut config = make_config(&primary, /*limit*/ 3, /*instructions*/ None).await;
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let user_instructions = config.user_instructions.clone();
let loaded = load_project_instructions(&mut config.config, user_instructions, &environments)
.await
.expect("instructions expected");
assert_eq!(
loaded.text(),
format!(
"for `primary` with root {}\n\nABC\n\nfor `secondary` with root {}\n\nVWX",
primary.path().display(),
secondary.path().display()
)
);
}
#[tokio::test]
async fn multiple_environments_can_exceed_single_environment_project_doc_limit() {
// TODO(anp): Add an aggregate cap across environments instead of allowing the combined
// project instructions to grow by one full per-environment budget for every binding.
const LIMIT: usize = 8;
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
let primary_doc = "P".repeat(LIMIT);
let secondary_doc = "S".repeat(LIMIT);
fs::write(primary.path().join("AGENTS.md"), &primary_doc).unwrap();
fs::write(secondary.path().join("AGENTS.md"), &secondary_doc).unwrap();
let mut config = make_config(&primary, LIMIT, /*instructions*/ None).await;
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let loaded = load_project_instructions(
&mut config.config,
/*user_instructions*/ None,
&environments,
)
.await
.expect("instructions expected");
let project_bytes = loaded
.entries
.iter()
.filter(|entry| matches!(&entry.provenance, InstructionProvenance::Project { .. }))
.map(|entry| entry.contents.len())
.sum::<usize>();
assert_eq!(project_bytes, LIMIT * 2);
assert!(project_bytes > config.project_doc_max_bytes);
assert!(loaded.text().contains(&primary_doc));
assert!(loaded.text().contains(&secondary_doc));
}
#[tokio::test]
async fn secondary_environment_invalid_utf8_warns_without_suppressing_other_docs() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::write(primary.path().join("AGENTS.md"), "primary doc").unwrap();
fs::write(secondary.path().join("AGENTS.md"), b"secondary\xFFdoc").unwrap();
let mut config = make_config(&primary, /*limit*/ 4096, /*instructions*/ None).await;
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let loaded = load_project_instructions(
&mut config.config,
/*user_instructions*/ None,
&environments,
)
.await
.expect("instructions expected");
assert!(loaded.text().contains("primary doc"));
assert!(loaded.text().contains("secondary\u{FFFD}doc"));
assert_invalid_utf8_warning(
&config.startup_warnings,
"Project",
secondary.path().join("AGENTS.md").as_path(),
);
}
#[tokio::test]
async fn child_agents_guidance_is_appended_once_after_environment_groups() {
let primary = tempfile::tempdir().expect("primary tempdir");
let secondary = tempfile::tempdir().expect("secondary tempdir");
fs::write(primary.path().join("AGENTS.md"), "primary doc").unwrap();
fs::write(secondary.path().join("AGENTS.md"), "secondary doc").unwrap();
let mut config = make_config(&primary, /*limit*/ 4096, /*instructions*/ None).await;
config.features.enable(Feature::ChildAgentsMd).unwrap();
let environments = resolved_local_environments([
("primary", config.cwd.clone()),
("secondary", secondary.abs()),
]);
let loaded = load_project_instructions(
&mut config.config,
/*user_instructions*/ None,
&environments,
)
.await
.expect("instructions expected");
let text = loaded.text();
assert_eq!(text.matches(HIERARCHICAL_AGENTS_MESSAGE).count(), 1);
assert!(text.ends_with(HIERARCHICAL_AGENTS_MESSAGE));
}
/// If there are existing system instructions but AGENTS.md docs are
/// missing we expect the original instructions to be returned unchanged.
#[tokio::test]
@@ -598,7 +870,6 @@ async fn keeps_existing_instructions_when_doc_missing() {
let tmp = tempfile::tempdir().expect("tempdir");
const INSTRUCTIONS: &str = "some instructions";
let res =
get_user_instructions(&make_config(&tmp, /*limit*/ 4096, Some(INSTRUCTIONS)).await).await;
@@ -640,11 +911,11 @@ async fn concatenates_root_and_cwd_docs() {
entries: vec![
InstructionEntry {
contents: "root doc".to_string(),
provenance: InstructionProvenance::Project(root_agents.clone()),
provenance: project_provenance(root_agents.clone(), cfg.cwd.clone()),
},
InstructionEntry {
contents: "crate doc".to_string(),
provenance: InstructionProvenance::Project(crate_agents.clone()),
provenance: project_provenance(crate_agents.clone(), cfg.cwd.clone()),
},
],
};
@@ -804,7 +1075,7 @@ async fn instruction_sources_include_global_before_agents_md_docs() {
}),
entries: vec![InstructionEntry {
contents: "project doc".to_string(),
provenance: InstructionProvenance::Project(project_agents.clone()),
provenance: project_provenance(project_agents.clone(), cfg.cwd.clone()),
}],
};
assert_eq!(loaded, expected);
@@ -844,7 +1115,7 @@ async fn child_agents_message_after_project_docs_is_not_an_instruction_source()
entries: vec![
InstructionEntry {
contents: "project doc".to_string(),
provenance: InstructionProvenance::Project(project_agents.clone()),
provenance: project_provenance(project_agents.clone(), cfg.cwd.clone()),
},
InstructionEntry {
contents: HIERARCHICAL_AGENTS_MESSAGE.to_string(),
@@ -17,10 +17,38 @@ fn detects_environment_context_fragment() {
#[test]
fn detects_agents_instructions_fragment() {
assert!(is_contextual_user_fragment(&ContentItem::InputText {
text: "# AGENTS.md instructions for /tmp\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>"
.to_string(),
}));
for text in [
"# AGENTS.md instructions for /tmp\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>",
"# AGENTS.md instructions\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>",
] {
assert!(is_contextual_user_fragment(&ContentItem::InputText {
text: text.to_string(),
}));
}
}
#[test]
fn renders_agents_instructions_with_legacy_directory_header() {
assert_eq!(
UserInstructions {
directory: Some("/tmp".to_string()),
text: "body".to_string(),
}
.render(),
"# AGENTS.md instructions for /tmp\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>"
);
}
#[test]
fn renders_agents_instructions_without_directory_header() {
assert_eq!(
UserInstructions {
directory: None,
text: "body".to_string(),
}
.render(),
"# AGENTS.md instructions\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>"
);
}
#[test]
@@ -2,7 +2,7 @@ use super::ContextualUserFragment;
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct UserInstructions {
pub(crate) directory: String,
pub(crate) directory: Option<String>,
pub(crate) text: String,
}
@@ -16,10 +16,15 @@ impl ContextualUserFragment for UserInstructions {
}
fn type_markers() -> (&'static str, &'static str) {
("# AGENTS.md instructions for ", "</INSTRUCTIONS>")
("# AGENTS.md instructions", "</INSTRUCTIONS>")
}
fn body(&self) -> String {
format!("{}\n\n<INSTRUCTIONS>\n{}\n", self.directory, self.text)
let directory = self
.directory
.as_ref()
.map(|directory| format!(" for {directory}"))
.unwrap_or_default();
format!("{directory}\n\n<INSTRUCTIONS>\n{}\n", self.text)
}
}
@@ -42,6 +42,7 @@ impl ResolvedTurnEnvironments {
self.turn_environments.first()
}
#[cfg(test)]
pub(crate) fn primary_environment(&self) -> Option<Arc<codex_exec_server::Environment>> {
self.primary()
.map(|environment| Arc::clone(&environment.environment))
+3 -15
View File
@@ -294,7 +294,6 @@ use crate::SkillLoadOutcome;
use crate::SkillMetadata;
use crate::SkillsManager;
use crate::agents_md::load_project_instructions;
use crate::context::UserInstructions;
use crate::exec_policy::ExecPolicyUpdateError;
use crate::guardian::GuardianReviewSessionManager;
use crate::mcp::McpManager;
@@ -527,13 +526,9 @@ impl Codex {
config
.startup_warnings
.extend(user_instruction_provider_warnings);
// TODO(anp) assemble instructions from multiple environments
let primary_environment = environment_selections.primary_environment();
let primary_fs = primary_environment
.as_ref()
.map(|environment| environment.get_filesystem());
let loaded_agents_md =
load_project_instructions(&mut config, user_instructions, primary_fs.as_deref()).await;
load_project_instructions(&mut config, user_instructions, &environment_selections)
.await;
let exec_policy = if crate::guardian::is_guardian_reviewer_source(&session_source) {
// Guardian review should rely on the built-in shell safety checks,
@@ -2978,14 +2973,7 @@ impl Session {
}
}
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
contextual_user_sections.push(
UserInstructions {
text: user_instructions.to_string(),
#[allow(deprecated)]
directory: turn_context.cwd.to_string_lossy().into_owned(),
}
.render(),
);
contextual_user_sections.push(user_instructions.to_string());
}
// This is full-context metadata. Steady-state context diffs should not re-emit it.
if turn_context.features.enabled(Feature::TokenBudget)
+1 -1
View File
@@ -553,7 +553,7 @@ impl Session {
user_instructions: session_configuration
.loaded_agents_md
.as_ref()
.map(LoadedAgentsMd::text),
.map(LoadedAgentsMd::render),
collaboration_mode: session_configuration.collaboration_mode.clone(),
multi_agent_version,
personality: session_configuration.personality,
+4 -1
View File
@@ -491,14 +491,17 @@ async fn resume_and_fork_do_not_restore_thread_environments_from_rollout() {
);
let selected_cwd =
AbsolutePathBuf::try_from(config.cwd.as_path().join("selected")).expect("absolute path");
std::fs::create_dir_all(&selected_cwd).expect("create selected cwd");
let environments = vec![TurnEnvironmentSelection {
environment_id: "local".to_string(),
cwd: selected_cwd.clone(),
}];
let default_cwd = config.cwd.clone();
let mut source_config = config.clone();
source_config.cwd = selected_cwd.clone();
let source = manager
.start_thread_with_options(StartThreadOptions {
config: config.clone(),
config: source_config,
initial_history: InitialHistory::New,
session_source: None,
thread_source: None,
@@ -99,7 +99,7 @@ pub fn format_response_items_snapshot(items: &[Value], options: &ContextSnapshot
}
if options.strip_agents_md_user_context
&& role == "user"
&& text.starts_with("# AGENTS.md instructions for ")
&& text.starts_with("# AGENTS.md instructions")
{
return None;
}
@@ -381,7 +381,7 @@ fn canonicalize_snapshot_text(text: &str) -> String {
if text.starts_with(PLUGINS_INSTRUCTIONS_OPEN_TAG) {
return "<PLUGINS_INSTRUCTIONS>".to_string();
}
if text.starts_with("# AGENTS.md instructions for ") {
if text.starts_with("# AGENTS.md instructions") {
return "<AGENTS_MD>".to_string();
}
if text.starts_with("<environment_context>") {
+153 -11
View File
@@ -3,16 +3,20 @@ use anyhow::anyhow;
use codex_core::ForkSnapshot;
use codex_core::StartThreadOptions;
use codex_exec_server::CreateDirectoryOptions;
use codex_exec_server::LOCAL_ENVIRONMENT_ID;
use codex_exec_server::REMOTE_ENVIRONMENT_ID;
use codex_features::Feature;
use codex_home::CodexHomeUserInstructionsProvider;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::TurnEnvironmentSelection;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_path_uri::PathUri;
use core_test_support::PathBufExt;
use core_test_support::create_directory_symlink;
use core_test_support::get_remote_test_env;
use core_test_support::load_default_config_for_test;
use core_test_support::responses;
use core_test_support::responses::ev_completed;
@@ -61,7 +65,7 @@ async fn agents_instructions(mut builder: TestCodexBuilder) -> Result<String> {
request
.message_input_texts("user")
.into_iter()
.find(|text| text.starts_with("# AGENTS.md instructions for "))
.find(|text| text.starts_with("# AGENTS.md instructions"))
.ok_or_else(|| anyhow::anyhow!("instructions message not found"))
}
@@ -79,7 +83,7 @@ fn instruction_fragments(request: &responses::ResponsesRequest) -> Vec<String> {
request
.message_input_texts("user")
.into_iter()
.filter(|text| text.starts_with("# AGENTS.md instructions for "))
.filter(|text| text.starts_with("# AGENTS.md instructions"))
.collect()
}
@@ -88,10 +92,31 @@ fn expected_instruction_fragment(cwd: &AbsolutePathBuf, contents: &str) -> Strin
format!("# AGENTS.md instructions for {cwd}\n\n<INSTRUCTIONS>\n{contents}\n</INSTRUCTIONS>")
}
fn expected_provider_only_instruction_fragment(contents: &str) -> String {
format!("# AGENTS.md instructions\n\n<INSTRUCTIONS>\n{contents}\n</INSTRUCTIONS>")
}
fn assert_single_instruction_fragment(request: &responses::ResponsesRequest, expected: &str) {
assert_eq!(instruction_fragments(request), vec![expected.to_string()]);
}
async fn submit_thread_turn(thread: &Arc<codex_core::CodexThread>, prompt: &str) -> Result<()> {
thread
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: prompt.to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
additional_context: Default::default(),
thread_settings: Default::default(),
})
.await?;
wait_for_event(thread, |event| matches!(event, EventMsg::TurnComplete(_))).await;
Ok(())
}
fn request_body_contains(request: &wiremock::Request, text: &str) -> bool {
let is_zstd = request
.headers
@@ -315,7 +340,7 @@ async fn symlinked_cwd_uses_logical_parent_for_agents_discovery() -> Result<()>
.single_request()
.message_input_texts("user")
.into_iter()
.find(|text| text.starts_with("# AGENTS.md instructions for "))
.find(|text| text.starts_with("# AGENTS.md instructions"))
.expect("instructions message");
assert!(instructions.contains("logical parent doc"));
assert!(instructions.contains("workspace doc"));
@@ -362,7 +387,7 @@ async fn selected_environment_sources_match_model_visible_instructions() -> Resu
.single_request()
.message_input_texts("user")
.into_iter()
.find(|text| text.starts_with("# AGENTS.md instructions for "))
.find(|text| text.starts_with("# AGENTS.md instructions"))
.expect("instructions message");
assert!(instructions.contains("global doc\n\n--- project-doc ---\n\nproject doc"));
@@ -561,6 +586,126 @@ async fn fresh_thread_composes_global_before_project_and_reports_sources() -> Re
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn multi_environment_thread_loads_every_project_and_keeps_creation_snapshot() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(_remote_env) = get_remote_test_env() else {
return Ok(());
};
let server = responses::start_mock_server().await;
let response_mock = responses::mount_sse_sequence(
&server,
vec![
responses::sse(vec![
responses::ev_response_created("multi-env-response-1"),
responses::ev_completed("multi-env-response-1"),
]),
responses::sse(vec![
responses::ev_response_created("multi-env-response-2"),
responses::ev_completed("multi-env-response-2"),
]),
],
)
.await;
let home = Arc::new(TempDir::new()?);
let global_source =
write_global_file(home.as_ref(), GLOBAL_AGENTS_FILENAME, GLOBAL_INSTRUCTIONS)?;
let provider = Arc::new(RecordingUserInstructionsProvider::new(Arc::new(
CodexHomeUserInstructionsProvider::new(AbsolutePathBuf::try_from(
home.path().to_path_buf(),
)?),
)));
let local_root = TempDir::new()?;
let local_source = local_root.path().join(GLOBAL_AGENTS_FILENAME);
std::fs::write(&local_source, "local project instructions")?;
let mut builder = test_codex()
.with_home(Arc::clone(&home))
.with_user_instructions_provider(provider.clone())
.with_workspace_setup(|cwd, fs| async move {
fs.write_file(
&PathUri::from_path(cwd.join(GLOBAL_AGENTS_FILENAME))?,
b"remote project instructions".to_vec(),
/*sandbox*/ None,
)
.await?;
Ok(())
});
let test = builder.build_with_remote_and_local_env(&server).await?;
let remote_source = test.config.cwd.join(GLOBAL_AGENTS_FILENAME);
let thread = test
.thread_manager
.start_thread_with_options(StartThreadOptions {
config: test.config.clone(),
initial_history: InitialHistory::New,
session_source: None,
thread_source: None,
dynamic_tools: Vec::new(),
metrics_service_name: None,
parent_trace: None,
environments: vec![
TurnEnvironmentSelection {
environment_id: REMOTE_ENVIRONMENT_ID.to_string(),
cwd: test.config.cwd.clone(),
},
TurnEnvironmentSelection {
environment_id: LOCAL_ENVIRONMENT_ID.to_string(),
cwd: local_root.path().to_path_buf().try_into()?,
},
],
thread_extension_init: Default::default(),
})
.await?;
assert_eq!(provider.load_count(), 2);
assert_eq!(
thread.thread.instruction_sources().await,
vec![
global_source.clone(),
remote_source.clone(),
local_source.clone().try_into()?,
]
);
submit_thread_turn(&thread.thread, "first multi-environment turn").await?;
write_global_file(
home.as_ref(),
GLOBAL_AGENTS_OVERRIDE_FILENAME,
NEW_GLOBAL_INSTRUCTIONS,
)?;
test.fs()
.write_file(
&PathUri::from_path(test.config.cwd.join(GLOBAL_AGENTS_OVERRIDE_FILENAME))?,
b"new remote project instructions".to_vec(),
/*sandbox*/ None,
)
.await?;
std::fs::write(
local_root.path().join(GLOBAL_AGENTS_OVERRIDE_FILENAME),
"new local project instructions",
)?;
submit_thread_turn(&thread.thread, "second multi-environment turn").await?;
let contents = format!(
"{GLOBAL_INSTRUCTIONS}\n\nfor `{REMOTE_ENVIRONMENT_ID}` with root {}\n\nremote project instructions\n\nfor `{LOCAL_ENVIRONMENT_ID}` with root {}\n\nlocal project instructions",
test.config.cwd.display(),
local_root.path().display(),
);
let expected =
format!("# AGENTS.md instructions\n\n<INSTRUCTIONS>\n{contents}\n</INSTRUCTIONS>");
let requests = response_mock.requests();
assert_eq!(requests.len(), 2);
assert_single_instruction_fragment(&requests[0], &expected);
assert_single_instruction_fragment(&requests[1], &expected);
assert_eq!(provider.load_count(), 2);
assert_eq!(
thread.thread.instruction_sources().await,
vec![global_source, remote_source, local_source.try_into()?]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn global_loading_warning_surfaces_during_thread_creation() -> Result<()> {
// Set up a malformed global instruction file and one model response.
@@ -604,7 +749,7 @@ async fn global_loading_warning_surfaces_during_thread_creation() -> Result<()>
"expected warning to contain \"invalid UTF-8\"; observed: {warning}"
);
let expected_fragment =
expected_instruction_fragment(&test.config.cwd, "global\u{FFFD}instructions");
expected_provider_only_instruction_fragment("global\u{FFFD}instructions");
assert_single_instruction_fragment(&response_mock.single_request(), &expected_fragment);
Ok(())
@@ -690,8 +835,7 @@ async fn cold_resume_replays_rendered_instructions_but_reports_current_config_so
Some(initial_input.as_slice()),
"cold resume should replay the original structured input prefix"
);
let expected_fragment =
expected_instruction_fragment(&initial.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let expected_fragment = expected_provider_only_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&requests[0], &expected_fragment);
assert_single_instruction_fragment(&requests[1], &expected_fragment);
@@ -797,8 +941,7 @@ async fn fork_replays_rendered_instructions_from_shared_history() -> Result<()>
Some(parent_input.as_slice()),
"fork should replay the parent's original structured input prefix"
);
let expected_fragment =
expected_instruction_fragment(&parent.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let expected_fragment = expected_provider_only_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&requests[0], &expected_fragment);
assert_single_instruction_fragment(&requests[1], &expected_fragment);
@@ -933,8 +1076,7 @@ async fn run_subagent_global_instruction_case(fork_context: bool) -> Result<()>
.map_err(|_| anyhow!("timed out waiting for the subagent request"))?;
// Assert parent and child report and render the parent's creation-time snapshot exactly once.
let expected_fragment =
expected_instruction_fragment(&test.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let expected_fragment = expected_provider_only_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&seed_request, &expected_fragment);
assert_single_instruction_fragment(&spawn_request, &expected_fragment);
assert_single_instruction_fragment(&child_request, &expected_fragment);
+3 -3
View File
@@ -502,7 +502,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
.position(|(role, text)| {
role == "user"
&& text.contains("be nice")
&& (text.starts_with("# AGENTS.md instructions for "))
&& text.starts_with("# AGENTS.md instructions")
})
.expect("user instructions");
let pos_environment = messages
@@ -1284,7 +1284,7 @@ async fn includes_user_instructions_message_in_request() {
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("# AGENTS.md instructions for ")),
.any(|text| text.starts_with("# AGENTS.md instructions")),
"expected AGENTS text in contextual user message, got {user_context_texts:?}"
);
let ui_text = user_context_texts
@@ -2370,7 +2370,7 @@ async fn includes_developer_instructions_message_in_request() {
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("# AGENTS.md instructions for ")),
.any(|text| text.starts_with("# AGENTS.md instructions")),
"expected AGENTS text in contextual user message, got {user_context_texts:?}"
);
let ui_text = user_context_texts
+8 -11
View File
@@ -292,7 +292,7 @@ fn instruction_fragments(request: &responses::ResponsesRequest) -> Vec<String> {
request
.message_input_texts("user")
.into_iter()
.filter(|text| text.starts_with("# AGENTS.md instructions for "))
.filter(|text| text.starts_with("# AGENTS.md instructions"))
.collect()
}
@@ -306,14 +306,13 @@ fn instruction_fragments_in_items(items: &[Value]) -> Vec<String> {
.filter_map(|item| item.get("content").and_then(Value::as_array))
.flatten()
.filter_map(|span| span.get("text").and_then(Value::as_str))
.filter(|text| text.starts_with("# AGENTS.md instructions for "))
.filter(|text| text.starts_with("# AGENTS.md instructions"))
.map(str::to_string)
.collect()
}
fn expected_instruction_fragment(cwd: &AbsolutePathBuf, contents: &str) -> String {
let cwd = cwd.as_path().display();
format!("# AGENTS.md instructions for {cwd}\n\n<INSTRUCTIONS>\n{contents}\n</INSTRUCTIONS>")
fn expected_instruction_fragment(contents: &str) -> String {
format!("# AGENTS.md instructions\n\n<INSTRUCTIONS>\n{contents}\n</INSTRUCTIONS>")
}
fn assert_single_instruction_fragment(request: &responses::ResponsesRequest, expected: &str) {
@@ -1193,7 +1192,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
!item
.get("text")
.and_then(|text| text.as_str())
.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
.is_some_and(|text| text.starts_with("# AGENTS.md instructions"))
})
.cloned()
.collect::<Vec<_>>();
@@ -4629,8 +4628,7 @@ async fn manual_compaction_keeps_the_creation_time_global_instructions() -> Resu
// path now contains new text.
let requests = response_mock.requests();
assert_eq!(requests.len(), 3);
let expected_fragment =
expected_instruction_fragment(&test.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let expected_fragment = expected_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&requests[0], &expected_fragment);
assert_single_instruction_fragment(&requests[1], &expected_fragment);
assert_single_instruction_fragment(&requests[2], &expected_fragment);
@@ -4702,8 +4700,7 @@ async fn mid_turn_compaction_keeps_the_creation_time_global_instructions() -> Re
// Assert the initial, compact, and resumed requests all keep the old snapshot and source.
let requests = response_mock.requests();
assert_eq!(requests.len(), 3);
let expected_fragment =
expected_instruction_fragment(&test.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let expected_fragment = expected_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&requests[0], &expected_fragment);
assert_single_instruction_fragment(&requests[1], &expected_fragment);
assert_single_instruction_fragment(&requests[2], &expected_fragment);
@@ -4776,7 +4773,7 @@ async fn remote_v2_compaction_keeps_creation_time_instructions_after_same_path_m
// creation-time item despite the file-backed source now containing new text.
let requests = response_mock.requests();
assert_eq!(requests.len(), 3);
let old_fragment = expected_instruction_fragment(&test.config.cwd, OLD_GLOBAL_INSTRUCTIONS);
let old_fragment = expected_instruction_fragment(OLD_GLOBAL_INSTRUCTIONS);
assert_single_instruction_fragment(&requests[0], &old_fragment);
assert_single_instruction_fragment(&requests[1], &old_fragment);
assert_single_instruction_fragment(&requests[2], &old_fragment);
@@ -44,7 +44,7 @@ async fn hierarchical_agents_appends_to_project_doc_in_user_instructions() {
let user_messages = request.message_input_texts("user");
let instructions = user_messages
.iter()
.find(|text| text.starts_with("# AGENTS.md instructions for "))
.find(|text| text.starts_with("# AGENTS.md instructions"))
.expect("instructions message");
assert!(
instructions.contains("be nice"),
@@ -88,7 +88,7 @@ async fn hierarchical_agents_emits_when_no_project_doc() {
let user_messages = request.message_input_texts("user");
let instructions = user_messages
.iter()
.find(|text| text.starts_with("# AGENTS.md instructions for "))
.find(|text| text.starts_with("# AGENTS.md instructions"))
.expect("instructions message");
assert!(
instructions.contains(HIERARCHICAL_AGENTS_SNIPPET),
@@ -52,7 +52,7 @@ fn user_instructions_wrapper_count(request: &ResponsesRequest) -> usize {
request
.message_input_texts("user")
.iter()
.filter(|text| text.starts_with("# AGENTS.md instructions for "))
.filter(|text| text.starts_with("# AGENTS.md instructions"))
.count()
}
+9 -1
View File
@@ -460,7 +460,7 @@ mod job {
return false;
};
matches_marked_fragment(text, "# AGENTS.md instructions for ", "</INSTRUCTIONS>")
matches_marked_fragment(text, "# AGENTS.md instructions", "</INSTRUCTIONS>")
|| matches_marked_fragment(text, "<skill>", "</skill>")
}
@@ -487,6 +487,10 @@ mod job {
"# AGENTS.md instructions for /tmp\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>",
true,
),
(
"# AGENTS.md instructions\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>",
true,
),
(
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
true,
@@ -665,6 +669,10 @@ mod tests {
"# AGENTS.md instructions for /tmp\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>"
.to_string(),
},
ContentItem::InputText {
text: "# AGENTS.md instructions\n\n<INSTRUCTIONS>\nbody\n</INSTRUCTIONS>"
.to_string(),
},
ContentItem::InputText {
text: "<environment_context>\n<cwd>/tmp</cwd>\n</environment_context>"
.to_string(),