mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
[codex] Exclude external tool output from memories (#26821)
## Summary - add contains_external_context() to tool output so other tools can be opted out of influencing memory when disable_on_external_context=true - Classify standalone web-search output as external context (to match behavior as hosted web search) - Verify with integration test
This commit is contained in:
committed by
GitHub
Unverified
parent
0526cb56ac
commit
26d9329833
@@ -28,6 +28,7 @@ use codex_extension_api::ToolCallOutcome;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_rollout::state_db;
|
||||
use codex_tools::ToolName;
|
||||
use codex_tools::ToolSearchInfo;
|
||||
use codex_tools::ToolSpec;
|
||||
@@ -696,6 +697,16 @@ async fn handle_any_tool(
|
||||
let call_id = invocation.call_id.clone();
|
||||
let payload = invocation.payload.clone();
|
||||
let output = tool.handle(invocation.clone()).await?;
|
||||
if output.contains_external_context()
|
||||
&& invocation.turn.config.memories.disable_on_external_context
|
||||
{
|
||||
state_db::mark_thread_memory_mode_polluted(
|
||||
invocation.session.services.state_db.as_deref(),
|
||||
invocation.session.thread_id,
|
||||
"tool_output",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let post_tool_use_payload =
|
||||
CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref());
|
||||
Ok(AnyToolResult {
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
use anyhow::Result;
|
||||
use codex_config::types::McpServerConfig;
|
||||
use codex_config::types::McpServerTransportConfig;
|
||||
use codex_core::config::Config;
|
||||
use codex_extension_api::ExtensionRegistryBuilder;
|
||||
use codex_features::Feature;
|
||||
use codex_login::CodexAuth;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::config_types::WebSearchMode;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
@@ -15,6 +19,7 @@ use codex_protocol::protocol::SessionMetaLine;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::UserMessageEvent;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use codex_web_search_extension::install as install_web_search_extension;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -34,9 +39,14 @@ use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
use tokio::time::Duration;
|
||||
use tracing_subscriber::prelude::*;
|
||||
use uuid::Uuid;
|
||||
use wiremock::Mock;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn new_thread_is_recorded_in_state_db() -> Result<()> {
|
||||
@@ -384,6 +394,84 @@ async fn web_search_marks_thread_memory_mode_polluted_when_configured() -> Resul
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn standalone_web_search_marks_thread_memory_mode_polluted_when_configured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/alpha/search"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
|
||||
"output": "Search result",
|
||||
})))
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
responses::ev_function_call_with_namespace(
|
||||
"web-run-1",
|
||||
"web",
|
||||
"run",
|
||||
&json!({
|
||||
"search_query": [{"q": "standalone web search"}],
|
||||
})
|
||||
.to_string(),
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let auth = CodexAuth::from_api_key("dummy");
|
||||
let auth_manager = codex_core::test_support::auth_manager_from_auth(auth.clone());
|
||||
let mut extension_builder = ExtensionRegistryBuilder::<Config>::new();
|
||||
install_web_search_extension(&mut extension_builder, auth_manager);
|
||||
let mut builder = test_codex()
|
||||
.with_auth(auth)
|
||||
.with_extensions(Arc::new(extension_builder.build()))
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::Sqlite)
|
||||
.expect("test config should allow feature update");
|
||||
config
|
||||
.features
|
||||
.enable(Feature::StandaloneWebSearch)
|
||||
.expect("standalone web search should be enabled");
|
||||
config.memories.disable_on_external_context = true;
|
||||
config
|
||||
.web_search_mode
|
||||
.set(WebSearchMode::Live)
|
||||
.expect("web search mode should be accepted");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let db = test.codex.state_db().expect("state db enabled");
|
||||
let thread_id = test.session_configured.thread_id;
|
||||
|
||||
test.submit_turn("search the web").await?;
|
||||
|
||||
let mut memory_mode = None;
|
||||
for _ in 0..100 {
|
||||
memory_mode = db.get_thread_memory_mode(thread_id).await?;
|
||||
if memory_mode.as_deref() == Some("polluted") {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
|
||||
assert_eq!(memory_mode.as_deref(), Some("polluted"));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn mcp_call_marks_thread_memory_mode_polluted_when_configured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -23,9 +23,11 @@ impl ToolOutput for SearchOutput {
|
||||
true
|
||||
}
|
||||
|
||||
fn contains_external_context(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
|
||||
// TODO: Make standalone search honor memories.disable_on_external_context,
|
||||
// as hosted web search does.
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
|
||||
@@ -18,6 +18,12 @@ pub trait ToolOutput: Send {
|
||||
|
||||
fn success_for_logging(&self) -> bool;
|
||||
|
||||
/// Whether this output contains external context that should disable memory generation when
|
||||
/// `memories.disable_on_external_context` is enabled.
|
||||
fn contains_external_context(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem;
|
||||
|
||||
/// Returns the tool call id exposed to `PostToolUse` hooks for this output.
|
||||
@@ -58,6 +64,10 @@ where
|
||||
(**self).success_for_logging()
|
||||
}
|
||||
|
||||
fn contains_external_context(&self) -> bool {
|
||||
(**self).contains_external_context()
|
||||
}
|
||||
|
||||
fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
|
||||
(**self).to_response_item(call_id, payload)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user