[codex] Use tool search for MCP tools by default (#29486)

## Why

MCP tools were only placed behind `tool_search` when a feature flag was
enabled or when there were at least 100 tools. That made the model's
tool flow depend on both rollout configuration and the number of
installed tools.

The searched-tool flow is now the intended behavior. Making it
unconditional when the model and provider support it gives every
supported setup the same behavior and lets us retire the feature flag
safely.

## What changed

- Defer all effective MCP tools when `tool_search` and namespaced tools
are supported.
- Keep exposing MCP tools directly when search cannot be used, so older
or unsupported model/provider combinations still work.
- Mark `tool_search_always_defer_mcp_tools` as removed and ignore old
configured values.
- Keep plugin filtering, app-only filtering, file handling, and MCP
calls working through the searched-tool flow.

## Why many tests changed

Many tests used to act as if the model could see MCP tools in its first
request and call them immediately. That is no longer the real flow: the
model first receives `tool_search`, searches for a tool, receives the
matching MCP tool, and then calls it in the next request.

The tests therefore needed an extra search step, and checks for tool
names, descriptions, and input fields had to move from the first request
to the search result. These are not separate product changes; they make
the tests follow what the model will actually see after this change. The
plugin tests still check which tools are allowed and where they came
from, the file tests still check upload fields and behavior, and the MCP
round-trip test still checks a successful call from start to finish.

## Tests

- `just test -p codex-features`
- Focused `codex-core` tests for MCP exposure and tool planning
- `just test -p codex-core explicit_plugin_mentions`
- `just test -p codex-core stdio_server_round_trip`
- Focused `codex-core` tests for tool search, app-only tools, and MCP
file uploads
This commit is contained in:
sayan-oai
2026-06-22 16:45:23 -07:00
committed by GitHub
Unverified
parent 4a82ecc3c9
commit c53b1dae09
10 changed files with 207 additions and 186 deletions
+1 -10
View File
@@ -2,7 +2,6 @@ use std::collections::HashSet;
use codex_connectors::AppToolPolicyEvaluator;
use codex_connectors::AppToolPolicyInput;
use codex_features::Feature;
use codex_mcp::CODEX_APPS_MCP_SERVER_NAME;
use codex_mcp::ToolInfo as McpToolInfo;
use codex_mcp::tool_is_model_visible;
@@ -11,8 +10,6 @@ use tracing::instrument;
use crate::config::Config;
use crate::connectors;
pub(crate) const DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD: usize = 100;
pub(crate) struct McpToolExposure {
pub(crate) direct_tools: Vec<McpToolInfo>,
pub(crate) deferred_tools: Option<Vec<McpToolInfo>>,
@@ -34,13 +31,7 @@ pub(crate) fn build_mcp_tool_exposure(
));
}
let should_defer = search_tool_enabled
&& (config
.features
.enabled(Feature::ToolSearchAlwaysDeferMcpTools)
|| deferred_tools.len() >= DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD);
if !should_defer {
if !search_tool_enabled {
return McpToolExposure {
direct_tools: deferred_tools,
deferred_tools: None,
+8 -13
View File
@@ -1,7 +1,6 @@
use std::collections::HashSet;
use std::sync::Arc;
use codex_features::Feature;
use codex_mcp::CODEX_APPS_MCP_SERVER_NAME;
use codex_mcp::ToolInfo;
use codex_tools::ToolName;
@@ -95,12 +94,12 @@ fn with_visibility(mut tool: ToolInfo, visibility: &[&str]) -> ToolInfo {
}
#[tokio::test]
async fn directly_exposes_small_effective_tool_sets() {
async fn directly_exposes_effective_tool_sets_when_search_is_unavailable() {
let config = test_config().await;
let mcp_tools = numbered_mcp_tools(DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD - 1);
let mcp_tools = numbered_mcp_tools(/*count*/ 2);
let exposure = build_mcp_tool_exposure(
&mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ true,
&mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ false,
);
assert_eq!(tool_names(&exposure.direct_tools), tool_names(&mcp_tools));
@@ -237,9 +236,9 @@ enabled = true
}
#[tokio::test]
async fn searches_large_effective_tool_sets() {
async fn defers_effective_tool_sets_when_search_is_available() {
let config = test_config().await;
let mcp_tools = numbered_mcp_tools(DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD);
let mcp_tools = numbered_mcp_tools(/*count*/ 2);
let exposure = build_mcp_tool_exposure(
&mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ true,
@@ -249,17 +248,13 @@ async fn searches_large_effective_tool_sets() {
let deferred_tools = exposure
.deferred_tools
.as_ref()
.expect("large tool sets should be discoverable through tool_search");
.expect("MCP tools should be discoverable through tool_search");
assert_eq!(tool_names(deferred_tools), tool_names(&mcp_tools));
}
#[tokio::test]
async fn always_defer_feature_defers_apps_too() {
let mut config = test_config().await;
config
.features
.enable(Feature::ToolSearchAlwaysDeferMcpTools)
.expect("test config should allow feature update");
async fn defers_apps_and_non_app_mcp_tools() {
let config = test_config().await;
let mcp_tools = vec![
make_mcp_tool(
"rmcp",
+7 -9
View File
@@ -326,7 +326,7 @@ fn hosted_model_tool_specs(context: &CoreToolPlanContext<'_>) -> Vec<ToolSpec> {
}
pub(crate) fn search_tool_enabled(turn_context: &TurnContext) -> bool {
turn_context.model_info.supports_search_tool
turn_context.model_info.supports_search_tool && namespace_tools_enabled(turn_context)
}
pub(crate) fn tool_suggest_enabled(turn_context: &TurnContext) -> bool {
@@ -820,12 +820,11 @@ fn add_collaboration_tools(context: &CoreToolPlanContext<'_>, planned_tools: &mu
} else {
let agent_type_description =
agent_type_description(turn_context, context.default_agent_type_description);
let exposure =
if search_tool_enabled(turn_context) && namespace_tools_enabled(turn_context) {
ToolExposure::Deferred
} else {
ToolExposure::Direct
};
let exposure = if search_tool_enabled(turn_context) {
ToolExposure::Deferred
} else {
ToolExposure::Direct
};
planned_tools.add_with_exposure(
SpawnAgentHandler::new(SpawnAgentToolOptions {
available_models: turn_context.available_models.clone(),
@@ -943,7 +942,7 @@ fn append_tool_search_executor(
planned_tools: &mut PlannedTools,
) {
let turn_context = context.turn_context;
if !(search_tool_enabled(turn_context) && namespace_tools_enabled(turn_context)) {
if !search_tool_enabled(turn_context) {
return;
}
@@ -990,7 +989,6 @@ fn append_extension_tool_executors(
reserved_tool_names.insert(ToolName::plain(codex_code_mode::WAIT_TOOL_NAME));
}
if search_tool_enabled(turn_context)
&& namespace_tools_enabled(turn_context)
&& planned_tools
.runtimes()
.iter()
@@ -35,10 +35,6 @@ async fn code_mode_only_exposes_direct_model_only_mcp_namespaces() -> Result<()>
.features
.enable(Feature::CodeModeOnly)
.expect("test config should allow feature update");
config
.features
.enable(Feature::ToolSearchAlwaysDeferMcpTools)
.expect("test config should allow feature update");
config.code_mode.direct_only_tool_namespaces =
vec![SEARCH_CALENDAR_NAMESPACE.to_string()];
});
+24 -8
View File
@@ -22,7 +22,9 @@ use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call_with_namespace;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::ev_tool_search_call;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::namespace_child_tool;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::TestCodex;
@@ -142,18 +144,29 @@ async fn run_extract_turn(test: &TestCodex, server: &MockServer) -> Result<Respo
vec![
sse(vec![
ev_response_created("resp-1"),
ev_tool_search_call(
"extract-search-1",
&json!({
"query": "extract text from uploaded document",
"limit": 1,
}),
),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_function_call_with_namespace(
"extract-call-1",
DOCUMENT_EXTRACT_NAMESPACE,
DOCUMENT_EXTRACT_TOOL,
&json!({"file": "report.txt"}).to_string(),
),
ev_completed("resp-1"),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_response_created("resp-3"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
ev_completed("resp-3"),
]),
],
)
@@ -190,13 +203,16 @@ async fn codex_apps_file_params_upload_environment_files_before_mcp_tool_call()
let mock = run_extract_turn(&test, &server).await?;
let requests = mock.requests();
let body = requests[0].body_json();
let search_output = requests[1].tool_search_output("extract-search-1");
let missing_tool_message = format!(
"missing tool {DOCUMENT_EXTRACT_NAMESPACE}{DOCUMENT_EXTRACT_TOOL} in /v1/responses request: {body:?}"
"missing tool {DOCUMENT_EXTRACT_NAMESPACE}{DOCUMENT_EXTRACT_TOOL} in tool_search output: {search_output:?}"
);
let extract_tool = requests[0]
.tool_by_name(DOCUMENT_EXTRACT_NAMESPACE, DOCUMENT_EXTRACT_TOOL)
.expect(&missing_tool_message);
let extract_tool = namespace_child_tool(
&search_output,
DOCUMENT_EXTRACT_NAMESPACE,
DOCUMENT_EXTRACT_TOOL,
)
.expect(&missing_tool_message);
assert_eq!(
extract_tool.pointer("/parameters/properties/file"),
Some(&json!({
+89 -84
View File
@@ -12,9 +12,15 @@ use codex_mcp::CODEX_APPS_MCP_SERVER_NAME;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use core_test_support::apps_test_server::AppsTestServer;
use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL;
use core_test_support::responses::ResponseMock;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::ev_tool_search_call;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::namespace_child_tool;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
@@ -29,6 +35,10 @@ use wiremock::MockServer;
const SAMPLE_PLUGIN_CONFIG_NAME: &str = "sample@test";
const SAMPLE_PLUGIN_DISPLAY_NAME: &str = "sample";
const SAMPLE_PLUGIN_DESCRIPTION: &str = "inspect sample data";
const SAMPLE_PLUGIN_APP_NAMESPACE: &str = "mcp__codex_apps__google_calendar";
const SAMPLE_PLUGIN_MCP_NAMESPACE: &str = "mcp__sample";
const PLUGIN_APP_SEARCH_CALL_ID: &str = "plugin-app-search";
const PLUGIN_MCP_SEARCH_CALL_ID: &str = "plugin-mcp-search";
fn sample_plugin_root(home: &TempDir) -> std::path::PathBuf {
home.path().join("plugins/cache/test/sample/local")
@@ -145,21 +155,53 @@ async fn build_apps_enabled_plugin_test_codex(
.expect("create new conversation"))
}
fn tool_names(body: &serde_json::Value) -> Vec<String> {
body.get("tools")
.and_then(serde_json::Value::as_array)
.map(|tools| {
tools
.iter()
.filter_map(|tool| {
tool.get("name")
.or_else(|| tool.get("type"))
.and_then(serde_json::Value::as_str)
.map(str::to_string)
})
.collect()
})
.unwrap_or_default()
async fn mount_plugin_tool_search_turn(server: &MockServer) -> ResponseMock {
mount_sse_sequence(
server,
vec![
sse(vec![
ev_response_created("resp-1"),
ev_tool_search_call(
PLUGIN_APP_SEARCH_CALL_ID,
&serde_json::json!({"query": "create calendar event"}),
),
ev_tool_search_call(
PLUGIN_MCP_SEARCH_CALL_ID,
&serde_json::json!({"query": "echo"}),
),
ev_completed("resp-1"),
]),
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
],
)
.await
}
fn assert_plugin_provenance(tool: &serde_json::Value) {
let description = tool
.get("description")
.and_then(serde_json::Value::as_str)
.expect("plugin tool description should be present");
assert!(
description.contains("This tool is part of plugin `sample`."),
"expected plugin provenance in tool description: {description:?}"
);
}
fn searched_plugin_tools(
request: &ResponsesRequest,
) -> (Option<serde_json::Value>, Option<serde_json::Value>) {
let app_output = request.tool_search_output(PLUGIN_APP_SEARCH_CALL_ID);
let mcp_output = request.tool_search_output(PLUGIN_MCP_SEARCH_CALL_ID);
(
namespace_child_tool(
&app_output,
SAMPLE_PLUGIN_APP_NAMESPACE,
SEARCH_CALENDAR_CREATE_TOOL,
)
.cloned(),
namespace_child_tool(&mcp_output, SAMPLE_PLUGIN_MCP_NAMESPACE, "echo").cloned(),
)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -237,11 +279,7 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() ->
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let apps_server = AppsTestServer::mount_with_connector_name(&server, "Google Calendar").await?;
let mock = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let mock = mount_plugin_tool_search_turn(&server).await;
let codex_home = Arc::new(TempDir::new()?);
let rmcp_test_server_bin = match stdio_server_bin() {
@@ -275,7 +313,8 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() ->
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = mock.single_request();
let requests = mock.requests();
let request = &requests[0];
let developer_messages = request.message_input_texts("developer");
assert!(
developer_messages
@@ -295,28 +334,18 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() ->
.any(|text| text.contains("Apps from this plugin")),
"expected visible plugin app guidance: {developer_messages:?}"
);
let request_body = request.body_json();
let request_tools = tool_names(&request_body);
assert!(
request_tools
.iter()
.any(|name| name == "mcp__codex_apps__google_calendar"),
"expected plugin app tools to become visible for this turn: {request_tools:?}"
request
.tool_by_name(SAMPLE_PLUGIN_MCP_NAMESPACE, "echo")
.is_none(),
"plugin MCP tool should not leak into the request for ChatGPT auth"
);
let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]);
let calendar_tool = calendar_tool.expect("plugin app tool should be searchable");
assert_plugin_provenance(&calendar_tool);
assert!(
request.tool_by_name("mcp__sample", "echo").is_none(),
"expected plugin MCP tool to be suppressed for ChatGPT auth"
);
let calendar_tool = request
.tool_by_name("mcp__codex_apps__google_calendar", "_create_event")
.expect("plugin app tool should be present");
let calendar_description = calendar_tool
.get("description")
.and_then(serde_json::Value::as_str)
.expect("plugin app tool description should be present");
assert!(
calendar_description.contains("This tool is part of plugin `sample`."),
"expected plugin app provenance in tool description: {calendar_description:?}"
echo_tool.is_none(),
"plugin MCP tool should be suppressed for ChatGPT auth"
);
Ok(())
@@ -327,11 +356,7 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() ->
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let apps_server = AppsTestServer::mount_with_connector_name(&server, "Google Calendar").await?;
let mock = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let mock = mount_plugin_tool_search_turn(&server).await;
let codex_home = Arc::new(TempDir::new()?);
let rmcp_test_server_bin = match stdio_server_bin() {
@@ -365,7 +390,8 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() ->
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = mock.single_request();
let requests = mock.requests();
let request = &requests[0];
let developer_messages = request.message_input_texts("developer");
assert!(
developer_messages
@@ -379,25 +405,13 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() ->
.any(|text| text.contains("Apps from this plugin")),
"expected plugin app guidance: {developer_messages:?}"
);
let request_body = request.body_json();
let request_tools = tool_names(&request_body);
let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]);
assert!(
request_tools
.iter()
.any(|name| name == "mcp__codex_apps__google_calendar"),
"expected plugin app tools to become visible for this turn: {request_tools:?}"
);
let echo_tool = request
.tool_by_name("mcp__sample", "echo")
.expect("plugin MCP tool should remain present");
let echo_description = echo_tool
.get("description")
.and_then(serde_json::Value::as_str)
.expect("plugin MCP tool description should be present");
assert!(
echo_description.contains("This tool is part of plugin `sample`."),
"expected plugin MCP provenance in tool description: {echo_description:?}"
calendar_tool.is_some(),
"plugin app tool should be searchable"
);
let echo_tool = echo_tool.expect("plugin MCP tool should remain searchable");
assert_plugin_provenance(&echo_tool);
Ok(())
}
@@ -406,11 +420,7 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() ->
async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mock = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let mock = mount_plugin_tool_search_turn(&server).await;
let codex_home = Arc::new(TempDir::new()?);
let rmcp_test_server_bin = match stdio_server_bin() {
@@ -454,7 +464,8 @@ async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() ->
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = mock.single_request();
let requests = mock.requests();
let request = &requests[0];
let developer_messages = request.message_input_texts("developer");
assert!(
developer_messages
@@ -474,25 +485,19 @@ async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() ->
.any(|text| text.contains("Apps from this plugin")),
"expected plugin app guidance to be suppressed for API-key auth: {developer_messages:?}"
);
let request_body = request.body_json();
let request_tools = tool_names(&request_body);
assert!(
!request_tools
.iter()
.any(|name| name == "mcp__codex_apps__google_calendar"),
"expected plugin app tools to be hidden for API-key auth: {request_tools:?}"
request
.tool_by_name(SAMPLE_PLUGIN_APP_NAMESPACE, SEARCH_CALENDAR_CREATE_TOOL)
.is_none(),
"plugin app tool should not leak into the request for API-key auth"
);
let echo_tool = request
.tool_by_name("mcp__sample", "echo")
.expect("plugin MCP tool should be present");
let echo_description = echo_tool
.get("description")
.and_then(serde_json::Value::as_str)
.expect("plugin MCP tool description should be present");
let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]);
assert!(
echo_description.contains("This tool is part of plugin `sample`."),
"expected plugin MCP provenance in tool description: {echo_description:?}"
calendar_tool.is_none(),
"plugin app tool should be hidden for API-key auth"
);
let echo_tool = echo_tool.expect("plugin MCP tool should be searchable");
assert_plugin_provenance(&echo_tool);
Ok(())
}
+23 -16
View File
@@ -539,20 +539,33 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
let server = responses::start_mock_server().await;
let call_id = "call-123";
let search_call_id = "search-rmcp-echo";
let server_name = "rmcp";
let namespace = format!("mcp__{server_name}");
let call_mock = mount_sse_once(
mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_tool_search_call(
search_call_id,
&json!({"query": "echo message and environment data"}),
),
responses::ev_completed("resp-1"),
]),
)
.await;
let call_mock = mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-2"),
responses::ev_function_call_with_namespace(
call_id,
&namespace,
"echo",
"{\"message\":\"ping\"}",
),
responses::ev_completed("resp-1"),
responses::ev_completed("resp-2"),
]),
)
.await;
@@ -560,7 +573,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
&server,
responses::sse(vec![
responses::ev_assistant_message("msg-1", "rmcp echo tool completed successfully."),
responses::ev_completed("resp-2"),
responses::ev_completed("resp-3"),
]),
)
.await;
@@ -645,13 +658,14 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let output_item = final_mock.single_request().function_call_output(call_id);
let request = call_mock.single_request();
let search_output = call_mock
.single_request()
.tool_search_output(search_call_id);
assert!(
request.tool_by_name(&namespace, "echo").is_some(),
"direct MCP tool should be sent as a namespace child tool: {:?}",
request.body_json()
responses::namespace_child_tool(&search_output, &namespace, "echo").is_some(),
"tool_search should surface the RMCP echo tool: {search_output:?}"
);
let output_item = final_mock.single_request().function_call_output(call_id);
let output_text = output_item
.get("output")
@@ -860,7 +874,7 @@ async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()>
let server_name = "rmcp";
let namespace = format!("mcp__{server_name}");
let call_mock = mount_sse_once(
mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-1"),
@@ -903,13 +917,6 @@ async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()>
)
.await?;
let request = call_mock.single_request();
assert!(
request.tool_by_name(&namespace, "sandbox_meta").is_some(),
"direct MCP tool should be sent as a namespace child tool: {:?}",
request.body_json()
);
let output_item = final_mock.single_request().function_call_output(call_id);
let output_text = output_item
.get("output")
+32 -38
View File
@@ -4,7 +4,6 @@
use anyhow::Result;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolFunctionSpec;
@@ -30,7 +29,6 @@ use core_test_support::apps_test_server::SEARCH_CALENDAR_APP_ONLY_TOOL;
use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL;
use core_test_support::apps_test_server::SEARCH_CALENDAR_LIST_TOOL;
use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE;
use core_test_support::apps_test_server::apps_enabled_builder;
use core_test_support::apps_test_server::configure_search_capable_apps;
use core_test_support::apps_test_server::configure_search_capable_model;
use core_test_support::apps_test_server::recorded_apps_tool_call_by_call_id;
@@ -180,7 +178,7 @@ async fn search_tool_enabled_by_default_adds_tool_search() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> {
async fn small_app_tool_sets_are_deferred_by_default() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -195,13 +193,7 @@ async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> {
)
.await;
let mut builder =
configured_builder(apps_server.chatgpt_base_url.clone()).with_config(|config| {
config
.features
.enable(Feature::ToolSearchAlwaysDeferMcpTools)
.expect("test config should allow feature update");
});
let mut builder = configured_builder(apps_server.chatgpt_base_url.clone());
let test = builder.build(&server).await?;
test.submit_turn_with_approval_and_permission_profile(
@@ -232,30 +224,42 @@ async fn app_only_tools_are_not_visible_or_runnable_by_direct_model_calls() -> R
let server = start_mock_server().await;
let apps_server =
AppsTestServer::mount_with_app_only_tool(&server, AppsTestToolLoading::Direct).await?;
let search_call_id = "app-only-search";
let call_id = "app-only-direct-call";
let mock = mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-1"),
ev_tool_search_call(
search_call_id,
&json!({
"query": "create calendar event",
"limit": 8,
}),
),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_function_call_with_namespace(
call_id,
SEARCH_CALENDAR_NAMESPACE,
SEARCH_CALENDAR_APP_ONLY_TOOL,
"{}",
),
ev_completed("resp-1"),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_response_created("resp-3"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
ev_completed("resp-3"),
]),
],
)
.await;
let mut builder = apps_enabled_builder(apps_server.chatgpt_base_url.clone());
let mut builder = configured_builder(apps_server.chatgpt_base_url.clone());
let test = builder.build(&server).await?;
test.submit_turn_with_approval_and_permission_profile(
"Try to call the app-only calendar tool.",
@@ -266,25 +270,25 @@ async fn app_only_tools_are_not_visible_or_runnable_by_direct_model_calls() -> R
let requests = mock.requests();
assert!(
namespace_child_tool(
&requests[0].body_json(),
tool_search_output_has_namespace_child(
&requests[1],
search_call_id,
SEARCH_CALENDAR_NAMESPACE,
SEARCH_CALENDAR_CREATE_TOOL
)
.is_some(),
"visible tool from the app-only tool's connector should be declared"
),
"visible tool from the app-only tool's connector should be searchable"
);
assert!(
namespace_child_tool(
&requests[0].body_json(),
!tool_search_output_has_namespace_child(
&requests[1],
search_call_id,
SEARCH_CALENDAR_NAMESPACE,
SEARCH_CALENDAR_APP_ONLY_TOOL
)
.is_none(),
"app-only tool should not be declared to a direct model"
),
"app-only tool should not be returned to the model by tool_search"
);
assert!(
requests[1]
requests[2]
.function_call_output(call_id)
.get("output")
.and_then(Value::as_str)
@@ -423,7 +427,7 @@ async fn search_tool_hides_apps_tools_without_search() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn explicit_app_mentions_respect_always_defer() -> Result<()> {
async fn explicit_app_mentions_leave_app_tools_deferred() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -438,13 +442,7 @@ async fn explicit_app_mentions_respect_always_defer() -> Result<()> {
)
.await;
let mut builder =
configured_builder(apps_server.chatgpt_base_url.clone()).with_config(|config| {
config
.features
.enable(Feature::ToolSearchAlwaysDeferMcpTools)
.expect("test config should allow feature update");
});
let mut builder = configured_builder(apps_server.chatgpt_base_url.clone());
let test = builder.build(&server).await?;
test.submit_turn_with_approval_and_permission_profile(
@@ -458,7 +456,7 @@ async fn explicit_app_mentions_respect_always_defer() -> Result<()> {
let tools = tool_names(&body);
assert!(
tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME),
"explicit app mentions should leave app tools deferred when always-defer is active: {tools:?}"
"explicit app mentions should leave app tools deferred: {tools:?}"
);
assert!(
namespace_child_tool(
@@ -1214,10 +1212,6 @@ async fn tool_search_surfaced_mcp_tool_errors_are_returned_to_model() -> Result<
let rmcp_test_server_bin = stdio_server_bin()?;
let mut builder =
configured_builder(apps_server.chatgpt_base_url.clone()).with_config(move |config| {
config
.features
.enable(Feature::ToolSearchAlwaysDeferMcpTools)
.expect("test config should allow feature update");
let mut servers = config.mcp_servers.get().clone();
servers.insert(
"rmcp".to_string(),
+4 -4
View File
@@ -157,7 +157,7 @@ pub enum Feature {
AppsMcpPathOverride,
/// Removed compatibility flag retained as a no-op now that tool_search is always enabled.
ToolSearch,
/// Always defer MCP tools behind tool_search instead of exposing small sets directly.
/// Removed compatibility flag. MCP tools are always deferred when tool_search is available.
ToolSearchAlwaysDeferMcpTools,
/// Expose MCP model-visible namespaces without the legacy `mcp__` prefix.
NonPrefixedMcpToolNames,
@@ -473,7 +473,7 @@ impl Features {
"apply_patch_freeform" => {
continue;
}
"tool_search" | "apps_mcp_path_override" => {
"tool_search" | "tool_search_always_defer_mcp_tools" | "apps_mcp_path_override" => {
continue;
}
"image_detail_original" | "resize_all_images" => {
@@ -1074,8 +1074,8 @@ pub const FEATURES: &[FeatureSpec] = &[
FeatureSpec {
id: Feature::ToolSearchAlwaysDeferMcpTools,
key: "tool_search_always_defer_mcp_tools",
stage: Stage::UnderDevelopment,
default_enabled: false,
stage: Stage::Removed,
default_enabled: true,
},
FeatureSpec {
id: Feature::NonPrefixedMcpToolNames,
+19
View File
@@ -561,6 +561,25 @@ fn from_sources_ignores_removed_plugin_hooks_feature_key() {
assert_eq!(features, Features::with_defaults());
}
#[test]
fn from_sources_ignores_removed_tool_search_always_defer_mcp_tools_feature_key() {
let features_toml = FeaturesToml::from(BTreeMap::from([(
"tool_search_always_defer_mcp_tools".to_string(),
false,
)]));
let features = Features::from_sources(
FeatureConfigSource {
features: Some(&features_toml),
..Default::default()
},
FeatureConfigSource::default(),
FeatureOverrides::default(),
);
assert_eq!(features, Features::with_defaults());
}
#[test]
fn multi_agent_v2_feature_config_deserializes_boolean_toggle() {
let features: FeaturesToml = toml::from_str(