From 26d932983398147e4443bd655ce24a6ce6833a1c Mon Sep 17 00:00:00 2001 From: rka-oai Date: Mon, 8 Jun 2026 09:53:04 -0700 Subject: [PATCH] [codex] Exclude external tool output from memories (#26821) ## Summary - add contains_external_context() to tool output so other tools can be opted out of influencing memory when disable_on_external_context=true - Classify standalone web-search output as external context (to match behavior as hosted web search) - Verify with integration test --- codex-rs/core/src/tools/registry.rs | 11 +++ codex-rs/core/tests/suite/sqlite_state.rs | 88 +++++++++++++++++++++++ codex-rs/ext/web-search/src/output.rs | 6 +- codex-rs/tools/src/tool_output.rs | 10 +++ 4 files changed, 113 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/tools/registry.rs b/codex-rs/core/src/tools/registry.rs index 1cc5e67eb..20e08e9f7 100644 --- a/codex-rs/core/src/tools/registry.rs +++ b/codex-rs/core/src/tools/registry.rs @@ -28,6 +28,7 @@ use codex_extension_api::ToolCallOutcome; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::protocol::EventMsg; +use codex_rollout::state_db; use codex_tools::ToolName; use codex_tools::ToolSearchInfo; use codex_tools::ToolSpec; @@ -696,6 +697,16 @@ async fn handle_any_tool( let call_id = invocation.call_id.clone(); let payload = invocation.payload.clone(); let output = tool.handle(invocation.clone()).await?; + if output.contains_external_context() + && invocation.turn.config.memories.disable_on_external_context + { + state_db::mark_thread_memory_mode_polluted( + invocation.session.services.state_db.as_deref(), + invocation.session.thread_id, + "tool_output", + ) + .await; + } let post_tool_use_payload = CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref()); Ok(AnyToolResult { diff --git a/codex-rs/core/tests/suite/sqlite_state.rs b/codex-rs/core/tests/suite/sqlite_state.rs index 9f8c940a0..eb9f11a86 100644 --- a/codex-rs/core/tests/suite/sqlite_state.rs +++ b/codex-rs/core/tests/suite/sqlite_state.rs @@ -1,8 +1,12 @@ use anyhow::Result; use codex_config::types::McpServerConfig; use codex_config::types::McpServerTransportConfig; +use codex_core::config::Config; +use codex_extension_api::ExtensionRegistryBuilder; use codex_features::Feature; +use codex_login::CodexAuth; use codex_protocol::ThreadId; +use codex_protocol::config_types::WebSearchMode; use codex_protocol::dynamic_tools::DynamicToolSpec; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::AskForApproval; @@ -15,6 +19,7 @@ use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::UserMessageEvent; use codex_protocol::user_input::UserInput; +use codex_web_search_extension::install as install_web_search_extension; use core_test_support::responses; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; @@ -34,9 +39,14 @@ use pretty_assertions::assert_eq; use serde_json::json; use std::collections::HashMap; use std::fs; +use std::sync::Arc; use tokio::time::Duration; use tracing_subscriber::prelude::*; use uuid::Uuid; +use wiremock::Mock; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn new_thread_is_recorded_in_state_db() -> Result<()> { @@ -384,6 +394,84 @@ async fn web_search_marks_thread_memory_mode_polluted_when_configured() -> Resul Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn standalone_web_search_marks_thread_memory_mode_polluted_when_configured() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + Mock::given(method("POST")) + .and(path("/v1/alpha/search")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "output": "Search result", + }))) + .expect(1) + .mount(&server) + .await; + mount_sse_sequence( + &server, + vec![ + responses::sse(vec![ + ev_response_created("resp-1"), + responses::ev_function_call_with_namespace( + "web-run-1", + "web", + "run", + &json!({ + "search_query": [{"q": "standalone web search"}], + }) + .to_string(), + ), + ev_completed("resp-1"), + ]), + responses::sse(vec![ + responses::ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let auth = CodexAuth::from_api_key("dummy"); + let auth_manager = codex_core::test_support::auth_manager_from_auth(auth.clone()); + let mut extension_builder = ExtensionRegistryBuilder::::new(); + install_web_search_extension(&mut extension_builder, auth_manager); + let mut builder = test_codex() + .with_auth(auth) + .with_extensions(Arc::new(extension_builder.build())) + .with_config(|config| { + config + .features + .enable(Feature::Sqlite) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::StandaloneWebSearch) + .expect("standalone web search should be enabled"); + config.memories.disable_on_external_context = true; + config + .web_search_mode + .set(WebSearchMode::Live) + .expect("web search mode should be accepted"); + }); + let test = builder.build(&server).await?; + let db = test.codex.state_db().expect("state db enabled"); + let thread_id = test.session_configured.thread_id; + + test.submit_turn("search the web").await?; + + let mut memory_mode = None; + for _ in 0..100 { + memory_mode = db.get_thread_memory_mode(thread_id).await?; + if memory_mode.as_deref() == Some("polluted") { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + + assert_eq!(memory_mode.as_deref(), Some("polluted")); + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn mcp_call_marks_thread_memory_mode_polluted_when_configured() -> Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/ext/web-search/src/output.rs b/codex-rs/ext/web-search/src/output.rs index 799897b62..543afdf7a 100644 --- a/codex-rs/ext/web-search/src/output.rs +++ b/codex-rs/ext/web-search/src/output.rs @@ -23,9 +23,11 @@ impl ToolOutput for SearchOutput { true } + fn contains_external_context(&self) -> bool { + true + } + fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem { - // TODO: Make standalone search honor memories.disable_on_external_context, - // as hosted web search does. ResponseInputItem::FunctionCallOutput { call_id: call_id.to_string(), output: FunctionCallOutputPayload::from_content_items(vec![ diff --git a/codex-rs/tools/src/tool_output.rs b/codex-rs/tools/src/tool_output.rs index b61bb80de..fe57f23ce 100644 --- a/codex-rs/tools/src/tool_output.rs +++ b/codex-rs/tools/src/tool_output.rs @@ -18,6 +18,12 @@ pub trait ToolOutput: Send { fn success_for_logging(&self) -> bool; + /// Whether this output contains external context that should disable memory generation when + /// `memories.disable_on_external_context` is enabled. + fn contains_external_context(&self) -> bool { + false + } + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem; /// Returns the tool call id exposed to `PostToolUse` hooks for this output. @@ -58,6 +64,10 @@ where (**self).success_for_logging() } + fn contains_external_context(&self) -> bool { + (**self).contains_external_context() + } + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { (**self).to_response_item(call_id, payload) }