[codex] Exclude external tool output from memories (#26821)

## Summary

- add contains_external_context() to tool output so other tools can be
opted out of influencing memory when disable_on_external_context=true
- Classify standalone web-search output as external context (to match
behavior as hosted web search)
- Verify with integration test
This commit is contained in:
rka-oai
2026-06-08 09:53:04 -07:00
committed by GitHub
Unverified
parent 0526cb56ac
commit 26d9329833
4 changed files with 113 additions and 2 deletions
+11
View File
@@ -28,6 +28,7 @@ use codex_extension_api::ToolCallOutcome;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::protocol::EventMsg;
use codex_rollout::state_db;
use codex_tools::ToolName;
use codex_tools::ToolSearchInfo;
use codex_tools::ToolSpec;
@@ -696,6 +697,16 @@ async fn handle_any_tool(
let call_id = invocation.call_id.clone();
let payload = invocation.payload.clone();
let output = tool.handle(invocation.clone()).await?;
if output.contains_external_context()
&& invocation.turn.config.memories.disable_on_external_context
{
state_db::mark_thread_memory_mode_polluted(
invocation.session.services.state_db.as_deref(),
invocation.session.thread_id,
"tool_output",
)
.await;
}
let post_tool_use_payload =
CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref());
Ok(AnyToolResult {
+88
View File
@@ -1,8 +1,12 @@
use anyhow::Result;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_core::config::Config;
use codex_extension_api::ExtensionRegistryBuilder;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_protocol::ThreadId;
use codex_protocol::config_types::WebSearchMode;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
@@ -15,6 +19,7 @@ use codex_protocol::protocol::SessionMetaLine;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::UserMessageEvent;
use codex_protocol::user_input::UserInput;
use codex_web_search_extension::install as install_web_search_extension;
use core_test_support::responses;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
@@ -34,9 +39,14 @@ use pretty_assertions::assert_eq;
use serde_json::json;
use std::collections::HashMap;
use std::fs;
use std::sync::Arc;
use tokio::time::Duration;
use tracing_subscriber::prelude::*;
use uuid::Uuid;
use wiremock::Mock;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn new_thread_is_recorded_in_state_db() -> Result<()> {
@@ -384,6 +394,84 @@ async fn web_search_marks_thread_memory_mode_polluted_when_configured() -> Resul
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn standalone_web_search_marks_thread_memory_mode_polluted_when_configured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
Mock::given(method("POST"))
.and(path("/v1/alpha/search"))
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
"output": "Search result",
})))
.expect(1)
.mount(&server)
.await;
mount_sse_sequence(
&server,
vec![
responses::sse(vec![
ev_response_created("resp-1"),
responses::ev_function_call_with_namespace(
"web-run-1",
"web",
"run",
&json!({
"search_query": [{"q": "standalone web search"}],
})
.to_string(),
),
ev_completed("resp-1"),
]),
responses::sse(vec![
responses::ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
],
)
.await;
let auth = CodexAuth::from_api_key("dummy");
let auth_manager = codex_core::test_support::auth_manager_from_auth(auth.clone());
let mut extension_builder = ExtensionRegistryBuilder::<Config>::new();
install_web_search_extension(&mut extension_builder, auth_manager);
let mut builder = test_codex()
.with_auth(auth)
.with_extensions(Arc::new(extension_builder.build()))
.with_config(|config| {
config
.features
.enable(Feature::Sqlite)
.expect("test config should allow feature update");
config
.features
.enable(Feature::StandaloneWebSearch)
.expect("standalone web search should be enabled");
config.memories.disable_on_external_context = true;
config
.web_search_mode
.set(WebSearchMode::Live)
.expect("web search mode should be accepted");
});
let test = builder.build(&server).await?;
let db = test.codex.state_db().expect("state db enabled");
let thread_id = test.session_configured.thread_id;
test.submit_turn("search the web").await?;
let mut memory_mode = None;
for _ in 0..100 {
memory_mode = db.get_thread_memory_mode(thread_id).await?;
if memory_mode.as_deref() == Some("polluted") {
break;
}
tokio::time::sleep(Duration::from_millis(25)).await;
}
assert_eq!(memory_mode.as_deref(), Some("polluted"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn mcp_call_marks_thread_memory_mode_polluted_when_configured() -> Result<()> {
skip_if_no_network!(Ok(()));
+4 -2
View File
@@ -23,9 +23,11 @@ impl ToolOutput for SearchOutput {
true
}
fn contains_external_context(&self) -> bool {
true
}
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
// TODO: Make standalone search honor memories.disable_on_external_context,
// as hosted web search does.
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
+10
View File
@@ -18,6 +18,12 @@ pub trait ToolOutput: Send {
fn success_for_logging(&self) -> bool;
/// Whether this output contains external context that should disable memory generation when
/// `memories.disable_on_external_context` is enabled.
fn contains_external_context(&self) -> bool {
false
}
fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem;
/// Returns the tool call id exposed to `PostToolUse` hooks for this output.
@@ -58,6 +64,10 @@ where
(**self).success_for_logging()
}
fn contains_external_context(&self) -> bool {
(**self).contains_external_context()
}
fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
(**self).to_response_item(call_id, payload)
}