From c53b1dae09db40902c59f6a0d57d0dcc334926db Mon Sep 17 00:00:00 2001 From: sayan-oai Date: Mon, 22 Jun 2026 16:45:23 -0700 Subject: [PATCH] [codex] Use tool search for MCP tools by default (#29486) ## Why MCP tools were only placed behind `tool_search` when a feature flag was enabled or when there were at least 100 tools. That made the model's tool flow depend on both rollout configuration and the number of installed tools. The searched-tool flow is now the intended behavior. Making it unconditional when the model and provider support it gives every supported setup the same behavior and lets us retire the feature flag safely. ## What changed - Defer all effective MCP tools when `tool_search` and namespaced tools are supported. - Keep exposing MCP tools directly when search cannot be used, so older or unsupported model/provider combinations still work. - Mark `tool_search_always_defer_mcp_tools` as removed and ignore old configured values. - Keep plugin filtering, app-only filtering, file handling, and MCP calls working through the searched-tool flow. ## Why many tests changed Many tests used to act as if the model could see MCP tools in its first request and call them immediately. That is no longer the real flow: the model first receives `tool_search`, searches for a tool, receives the matching MCP tool, and then calls it in the next request. The tests therefore needed an extra search step, and checks for tool names, descriptions, and input fields had to move from the first request to the search result. These are not separate product changes; they make the tests follow what the model will actually see after this change. The plugin tests still check which tools are allowed and where they came from, the file tests still check upload fields and behavior, and the MCP round-trip test still checks a successful call from start to finish. ## Tests - `just test -p codex-features` - Focused `codex-core` tests for MCP exposure and tool planning - `just test -p codex-core explicit_plugin_mentions` - `just test -p codex-core stdio_server_round_trip` - Focused `codex-core` tests for tool search, app-only tools, and MCP file uploads --- codex-rs/core/src/mcp_tool_exposure.rs | 11 +- codex-rs/core/src/mcp_tool_exposure_test.rs | 21 +-- codex-rs/core/src/tools/spec_plan.rs | 16 +- .../core/tests/suite/mcp_tool_exposure.rs | 4 - codex-rs/core/tests/suite/openai_file_mcp.rs | 32 +++- codex-rs/core/tests/suite/plugins.rs | 173 +++++++++--------- codex-rs/core/tests/suite/rmcp_client.rs | 39 ++-- codex-rs/core/tests/suite/search_tool.rs | 70 ++++--- codex-rs/features/src/lib.rs | 8 +- codex-rs/features/src/tests.rs | 19 ++ 10 files changed, 207 insertions(+), 186 deletions(-) diff --git a/codex-rs/core/src/mcp_tool_exposure.rs b/codex-rs/core/src/mcp_tool_exposure.rs index d62880615..fda2b04c9 100644 --- a/codex-rs/core/src/mcp_tool_exposure.rs +++ b/codex-rs/core/src/mcp_tool_exposure.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use codex_connectors::AppToolPolicyEvaluator; use codex_connectors::AppToolPolicyInput; -use codex_features::Feature; use codex_mcp::CODEX_APPS_MCP_SERVER_NAME; use codex_mcp::ToolInfo as McpToolInfo; use codex_mcp::tool_is_model_visible; @@ -11,8 +10,6 @@ use tracing::instrument; use crate::config::Config; use crate::connectors; -pub(crate) const DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD: usize = 100; - pub(crate) struct McpToolExposure { pub(crate) direct_tools: Vec, pub(crate) deferred_tools: Option>, @@ -34,13 +31,7 @@ pub(crate) fn build_mcp_tool_exposure( )); } - let should_defer = search_tool_enabled - && (config - .features - .enabled(Feature::ToolSearchAlwaysDeferMcpTools) - || deferred_tools.len() >= DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD); - - if !should_defer { + if !search_tool_enabled { return McpToolExposure { direct_tools: deferred_tools, deferred_tools: None, diff --git a/codex-rs/core/src/mcp_tool_exposure_test.rs b/codex-rs/core/src/mcp_tool_exposure_test.rs index 63a2e33f3..71c92b558 100644 --- a/codex-rs/core/src/mcp_tool_exposure_test.rs +++ b/codex-rs/core/src/mcp_tool_exposure_test.rs @@ -1,7 +1,6 @@ use std::collections::HashSet; use std::sync::Arc; -use codex_features::Feature; use codex_mcp::CODEX_APPS_MCP_SERVER_NAME; use codex_mcp::ToolInfo; use codex_tools::ToolName; @@ -95,12 +94,12 @@ fn with_visibility(mut tool: ToolInfo, visibility: &[&str]) -> ToolInfo { } #[tokio::test] -async fn directly_exposes_small_effective_tool_sets() { +async fn directly_exposes_effective_tool_sets_when_search_is_unavailable() { let config = test_config().await; - let mcp_tools = numbered_mcp_tools(DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD - 1); + let mcp_tools = numbered_mcp_tools(/*count*/ 2); let exposure = build_mcp_tool_exposure( - &mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ true, + &mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ false, ); assert_eq!(tool_names(&exposure.direct_tools), tool_names(&mcp_tools)); @@ -237,9 +236,9 @@ enabled = true } #[tokio::test] -async fn searches_large_effective_tool_sets() { +async fn defers_effective_tool_sets_when_search_is_available() { let config = test_config().await; - let mcp_tools = numbered_mcp_tools(DIRECT_MCP_TOOL_EXPOSURE_THRESHOLD); + let mcp_tools = numbered_mcp_tools(/*count*/ 2); let exposure = build_mcp_tool_exposure( &mcp_tools, /*connectors*/ None, &config, /*search_tool_enabled*/ true, @@ -249,17 +248,13 @@ async fn searches_large_effective_tool_sets() { let deferred_tools = exposure .deferred_tools .as_ref() - .expect("large tool sets should be discoverable through tool_search"); + .expect("MCP tools should be discoverable through tool_search"); assert_eq!(tool_names(deferred_tools), tool_names(&mcp_tools)); } #[tokio::test] -async fn always_defer_feature_defers_apps_too() { - let mut config = test_config().await; - config - .features - .enable(Feature::ToolSearchAlwaysDeferMcpTools) - .expect("test config should allow feature update"); +async fn defers_apps_and_non_app_mcp_tools() { + let config = test_config().await; let mcp_tools = vec![ make_mcp_tool( "rmcp", diff --git a/codex-rs/core/src/tools/spec_plan.rs b/codex-rs/core/src/tools/spec_plan.rs index c83422f1a..51cd43650 100644 --- a/codex-rs/core/src/tools/spec_plan.rs +++ b/codex-rs/core/src/tools/spec_plan.rs @@ -326,7 +326,7 @@ fn hosted_model_tool_specs(context: &CoreToolPlanContext<'_>) -> Vec { } pub(crate) fn search_tool_enabled(turn_context: &TurnContext) -> bool { - turn_context.model_info.supports_search_tool + turn_context.model_info.supports_search_tool && namespace_tools_enabled(turn_context) } pub(crate) fn tool_suggest_enabled(turn_context: &TurnContext) -> bool { @@ -820,12 +820,11 @@ fn add_collaboration_tools(context: &CoreToolPlanContext<'_>, planned_tools: &mu } else { let agent_type_description = agent_type_description(turn_context, context.default_agent_type_description); - let exposure = - if search_tool_enabled(turn_context) && namespace_tools_enabled(turn_context) { - ToolExposure::Deferred - } else { - ToolExposure::Direct - }; + let exposure = if search_tool_enabled(turn_context) { + ToolExposure::Deferred + } else { + ToolExposure::Direct + }; planned_tools.add_with_exposure( SpawnAgentHandler::new(SpawnAgentToolOptions { available_models: turn_context.available_models.clone(), @@ -943,7 +942,7 @@ fn append_tool_search_executor( planned_tools: &mut PlannedTools, ) { let turn_context = context.turn_context; - if !(search_tool_enabled(turn_context) && namespace_tools_enabled(turn_context)) { + if !search_tool_enabled(turn_context) { return; } @@ -990,7 +989,6 @@ fn append_extension_tool_executors( reserved_tool_names.insert(ToolName::plain(codex_code_mode::WAIT_TOOL_NAME)); } if search_tool_enabled(turn_context) - && namespace_tools_enabled(turn_context) && planned_tools .runtimes() .iter() diff --git a/codex-rs/core/tests/suite/mcp_tool_exposure.rs b/codex-rs/core/tests/suite/mcp_tool_exposure.rs index 44f2f87c3..b86862edc 100644 --- a/codex-rs/core/tests/suite/mcp_tool_exposure.rs +++ b/codex-rs/core/tests/suite/mcp_tool_exposure.rs @@ -35,10 +35,6 @@ async fn code_mode_only_exposes_direct_model_only_mcp_namespaces() -> Result<()> .features .enable(Feature::CodeModeOnly) .expect("test config should allow feature update"); - config - .features - .enable(Feature::ToolSearchAlwaysDeferMcpTools) - .expect("test config should allow feature update"); config.code_mode.direct_only_tool_namespaces = vec![SEARCH_CALENDAR_NAMESPACE.to_string()]; }); diff --git a/codex-rs/core/tests/suite/openai_file_mcp.rs b/codex-rs/core/tests/suite/openai_file_mcp.rs index edc14cead..9068f5a56 100644 --- a/codex-rs/core/tests/suite/openai_file_mcp.rs +++ b/codex-rs/core/tests/suite/openai_file_mcp.rs @@ -22,7 +22,9 @@ use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call_with_namespace; use core_test_support::responses::ev_response_created; +use core_test_support::responses::ev_tool_search_call; use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::namespace_child_tool; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::test_codex::TestCodex; @@ -142,18 +144,29 @@ async fn run_extract_turn(test: &TestCodex, server: &MockServer) -> Result std::path::PathBuf { home.path().join("plugins/cache/test/sample/local") @@ -145,21 +155,53 @@ async fn build_apps_enabled_plugin_test_codex( .expect("create new conversation")) } -fn tool_names(body: &serde_json::Value) -> Vec { - body.get("tools") - .and_then(serde_json::Value::as_array) - .map(|tools| { - tools - .iter() - .filter_map(|tool| { - tool.get("name") - .or_else(|| tool.get("type")) - .and_then(serde_json::Value::as_str) - .map(str::to_string) - }) - .collect() - }) - .unwrap_or_default() +async fn mount_plugin_tool_search_turn(server: &MockServer) -> ResponseMock { + mount_sse_sequence( + server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_tool_search_call( + PLUGIN_APP_SEARCH_CALL_ID, + &serde_json::json!({"query": "create calendar event"}), + ), + ev_tool_search_call( + PLUGIN_MCP_SEARCH_CALL_ID, + &serde_json::json!({"query": "echo"}), + ), + ev_completed("resp-1"), + ]), + sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]), + ], + ) + .await +} + +fn assert_plugin_provenance(tool: &serde_json::Value) { + let description = tool + .get("description") + .and_then(serde_json::Value::as_str) + .expect("plugin tool description should be present"); + assert!( + description.contains("This tool is part of plugin `sample`."), + "expected plugin provenance in tool description: {description:?}" + ); +} + +fn searched_plugin_tools( + request: &ResponsesRequest, +) -> (Option, Option) { + let app_output = request.tool_search_output(PLUGIN_APP_SEARCH_CALL_ID); + let mcp_output = request.tool_search_output(PLUGIN_MCP_SEARCH_CALL_ID); + ( + namespace_child_tool( + &app_output, + SAMPLE_PLUGIN_APP_NAMESPACE, + SEARCH_CALENDAR_CREATE_TOOL, + ) + .cloned(), + namespace_child_tool(&mcp_output, SAMPLE_PLUGIN_MCP_NAMESPACE, "echo").cloned(), + ) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -237,11 +279,7 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() -> skip_if_no_network!(Ok(())); let server = start_mock_server().await; let apps_server = AppsTestServer::mount_with_connector_name(&server, "Google Calendar").await?; - let mock = mount_sse_once( - &server, - sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), - ) - .await; + let mock = mount_plugin_tool_search_turn(&server).await; let codex_home = Arc::new(TempDir::new()?); let rmcp_test_server_bin = match stdio_server_bin() { @@ -275,7 +313,8 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() -> .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; - let request = mock.single_request(); + let requests = mock.requests(); + let request = &requests[0]; let developer_messages = request.message_input_texts("developer"); assert!( developer_messages @@ -295,28 +334,18 @@ async fn explicit_plugin_mentions_use_apps_for_chatgpt_dual_surface_plugins() -> .any(|text| text.contains("Apps from this plugin")), "expected visible plugin app guidance: {developer_messages:?}" ); - let request_body = request.body_json(); - let request_tools = tool_names(&request_body); assert!( - request_tools - .iter() - .any(|name| name == "mcp__codex_apps__google_calendar"), - "expected plugin app tools to become visible for this turn: {request_tools:?}" + request + .tool_by_name(SAMPLE_PLUGIN_MCP_NAMESPACE, "echo") + .is_none(), + "plugin MCP tool should not leak into the request for ChatGPT auth" ); + let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]); + let calendar_tool = calendar_tool.expect("plugin app tool should be searchable"); + assert_plugin_provenance(&calendar_tool); assert!( - request.tool_by_name("mcp__sample", "echo").is_none(), - "expected plugin MCP tool to be suppressed for ChatGPT auth" - ); - let calendar_tool = request - .tool_by_name("mcp__codex_apps__google_calendar", "_create_event") - .expect("plugin app tool should be present"); - let calendar_description = calendar_tool - .get("description") - .and_then(serde_json::Value::as_str) - .expect("plugin app tool description should be present"); - assert!( - calendar_description.contains("This tool is part of plugin `sample`."), - "expected plugin app provenance in tool description: {calendar_description:?}" + echo_tool.is_none(), + "plugin MCP tool should be suppressed for ChatGPT auth" ); Ok(()) @@ -327,11 +356,7 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() -> skip_if_no_network!(Ok(())); let server = start_mock_server().await; let apps_server = AppsTestServer::mount_with_connector_name(&server, "Google Calendar").await?; - let mock = mount_sse_once( - &server, - sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), - ) - .await; + let mock = mount_plugin_tool_search_turn(&server).await; let codex_home = Arc::new(TempDir::new()?); let rmcp_test_server_bin = match stdio_server_bin() { @@ -365,7 +390,8 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() -> .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; - let request = mock.single_request(); + let requests = mock.requests(); + let request = &requests[0]; let developer_messages = request.message_input_texts("developer"); assert!( developer_messages @@ -379,25 +405,13 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() -> .any(|text| text.contains("Apps from this plugin")), "expected plugin app guidance: {developer_messages:?}" ); - let request_body = request.body_json(); - let request_tools = tool_names(&request_body); + let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]); assert!( - request_tools - .iter() - .any(|name| name == "mcp__codex_apps__google_calendar"), - "expected plugin app tools to become visible for this turn: {request_tools:?}" - ); - let echo_tool = request - .tool_by_name("mcp__sample", "echo") - .expect("plugin MCP tool should remain present"); - let echo_description = echo_tool - .get("description") - .and_then(serde_json::Value::as_str) - .expect("plugin MCP tool description should be present"); - assert!( - echo_description.contains("This tool is part of plugin `sample`."), - "expected plugin MCP provenance in tool description: {echo_description:?}" + calendar_tool.is_some(), + "plugin app tool should be searchable" ); + let echo_tool = echo_tool.expect("plugin MCP tool should remain searchable"); + assert_plugin_provenance(&echo_tool); Ok(()) } @@ -406,11 +420,7 @@ async fn explicit_plugin_mentions_keep_non_conflicting_mcp_for_chatgpt_auth() -> async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mock = mount_sse_once( - &server, - sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), - ) - .await; + let mock = mount_plugin_tool_search_turn(&server).await; let codex_home = Arc::new(TempDir::new()?); let rmcp_test_server_bin = match stdio_server_bin() { @@ -454,7 +464,8 @@ async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() -> .await?; wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; - let request = mock.single_request(); + let requests = mock.requests(); + let request = &requests[0]; let developer_messages = request.message_input_texts("developer"); assert!( developer_messages @@ -474,25 +485,19 @@ async fn explicit_plugin_mentions_use_mcp_for_api_key_dual_surface_plugins() -> .any(|text| text.contains("Apps from this plugin")), "expected plugin app guidance to be suppressed for API-key auth: {developer_messages:?}" ); - let request_body = request.body_json(); - let request_tools = tool_names(&request_body); assert!( - !request_tools - .iter() - .any(|name| name == "mcp__codex_apps__google_calendar"), - "expected plugin app tools to be hidden for API-key auth: {request_tools:?}" + request + .tool_by_name(SAMPLE_PLUGIN_APP_NAMESPACE, SEARCH_CALENDAR_CREATE_TOOL) + .is_none(), + "plugin app tool should not leak into the request for API-key auth" ); - let echo_tool = request - .tool_by_name("mcp__sample", "echo") - .expect("plugin MCP tool should be present"); - let echo_description = echo_tool - .get("description") - .and_then(serde_json::Value::as_str) - .expect("plugin MCP tool description should be present"); + let (calendar_tool, echo_tool) = searched_plugin_tools(&requests[1]); assert!( - echo_description.contains("This tool is part of plugin `sample`."), - "expected plugin MCP provenance in tool description: {echo_description:?}" + calendar_tool.is_none(), + "plugin app tool should be hidden for API-key auth" ); + let echo_tool = echo_tool.expect("plugin MCP tool should be searchable"); + assert_plugin_provenance(&echo_tool); Ok(()) } diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 2b4e09bd1..236fbee4e 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -539,20 +539,33 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> { let server = responses::start_mock_server().await; let call_id = "call-123"; + let search_call_id = "search-rmcp-echo"; let server_name = "rmcp"; let namespace = format!("mcp__{server_name}"); - let call_mock = mount_sse_once( + mount_sse_once( &server, responses::sse(vec![ responses::ev_response_created("resp-1"), + responses::ev_tool_search_call( + search_call_id, + &json!({"query": "echo message and environment data"}), + ), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let call_mock = mount_sse_once( + &server, + responses::sse(vec![ + responses::ev_response_created("resp-2"), responses::ev_function_call_with_namespace( call_id, &namespace, "echo", "{\"message\":\"ping\"}", ), - responses::ev_completed("resp-1"), + responses::ev_completed("resp-2"), ]), ) .await; @@ -560,7 +573,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> { &server, responses::sse(vec![ responses::ev_assistant_message("msg-1", "rmcp echo tool completed successfully."), - responses::ev_completed("resp-2"), + responses::ev_completed("resp-3"), ]), ) .await; @@ -645,13 +658,14 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> { wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; - let output_item = final_mock.single_request().function_call_output(call_id); - let request = call_mock.single_request(); + let search_output = call_mock + .single_request() + .tool_search_output(search_call_id); assert!( - request.tool_by_name(&namespace, "echo").is_some(), - "direct MCP tool should be sent as a namespace child tool: {:?}", - request.body_json() + responses::namespace_child_tool(&search_output, &namespace, "echo").is_some(), + "tool_search should surface the RMCP echo tool: {search_output:?}" ); + let output_item = final_mock.single_request().function_call_output(call_id); let output_text = output_item .get("output") @@ -860,7 +874,7 @@ async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()> let server_name = "rmcp"; let namespace = format!("mcp__{server_name}"); - let call_mock = mount_sse_once( + mount_sse_once( &server, responses::sse(vec![ responses::ev_response_created("resp-1"), @@ -903,13 +917,6 @@ async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()> ) .await?; - let request = call_mock.single_request(); - assert!( - request.tool_by_name(&namespace, "sandbox_meta").is_some(), - "direct MCP tool should be sent as a namespace child tool: {:?}", - request.body_json() - ); - let output_item = final_mock.single_request().function_call_output(call_id); let output_text = output_item .get("output") diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index 722854772..af6782792 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -4,7 +4,6 @@ use anyhow::Result; use codex_config::types::McpServerConfig; use codex_config::types::McpServerTransportConfig; -use codex_features::Feature; use codex_login::CodexAuth; use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolFunctionSpec; @@ -30,7 +29,6 @@ use core_test_support::apps_test_server::SEARCH_CALENDAR_APP_ONLY_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_LIST_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE; -use core_test_support::apps_test_server::apps_enabled_builder; use core_test_support::apps_test_server::configure_search_capable_apps; use core_test_support::apps_test_server::configure_search_capable_model; use core_test_support::apps_test_server::recorded_apps_tool_call_by_call_id; @@ -180,7 +178,7 @@ async fn search_tool_enabled_by_default_adds_tool_search() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> { +async fn small_app_tool_sets_are_deferred_by_default() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -195,13 +193,7 @@ async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> { ) .await; - let mut builder = - configured_builder(apps_server.chatgpt_base_url.clone()).with_config(|config| { - config - .features - .enable(Feature::ToolSearchAlwaysDeferMcpTools) - .expect("test config should allow feature update"); - }); + let mut builder = configured_builder(apps_server.chatgpt_base_url.clone()); let test = builder.build(&server).await?; test.submit_turn_with_approval_and_permission_profile( @@ -232,30 +224,42 @@ async fn app_only_tools_are_not_visible_or_runnable_by_direct_model_calls() -> R let server = start_mock_server().await; let apps_server = AppsTestServer::mount_with_app_only_tool(&server, AppsTestToolLoading::Direct).await?; + let search_call_id = "app-only-search"; let call_id = "app-only-direct-call"; let mock = mount_sse_sequence( &server, vec![ sse(vec![ ev_response_created("resp-1"), + ev_tool_search_call( + search_call_id, + &json!({ + "query": "create calendar event", + "limit": 8, + }), + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), ev_function_call_with_namespace( call_id, SEARCH_CALENDAR_NAMESPACE, SEARCH_CALENDAR_APP_ONLY_TOOL, "{}", ), - ev_completed("resp-1"), + ev_completed("resp-2"), ]), sse(vec![ - ev_response_created("resp-2"), + ev_response_created("resp-3"), ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), + ev_completed("resp-3"), ]), ], ) .await; - let mut builder = apps_enabled_builder(apps_server.chatgpt_base_url.clone()); + let mut builder = configured_builder(apps_server.chatgpt_base_url.clone()); let test = builder.build(&server).await?; test.submit_turn_with_approval_and_permission_profile( "Try to call the app-only calendar tool.", @@ -266,25 +270,25 @@ async fn app_only_tools_are_not_visible_or_runnable_by_direct_model_calls() -> R let requests = mock.requests(); assert!( - namespace_child_tool( - &requests[0].body_json(), + tool_search_output_has_namespace_child( + &requests[1], + search_call_id, SEARCH_CALENDAR_NAMESPACE, SEARCH_CALENDAR_CREATE_TOOL - ) - .is_some(), - "visible tool from the app-only tool's connector should be declared" + ), + "visible tool from the app-only tool's connector should be searchable" ); assert!( - namespace_child_tool( - &requests[0].body_json(), + !tool_search_output_has_namespace_child( + &requests[1], + search_call_id, SEARCH_CALENDAR_NAMESPACE, SEARCH_CALENDAR_APP_ONLY_TOOL - ) - .is_none(), - "app-only tool should not be declared to a direct model" + ), + "app-only tool should not be returned to the model by tool_search" ); assert!( - requests[1] + requests[2] .function_call_output(call_id) .get("output") .and_then(Value::as_str) @@ -423,7 +427,7 @@ async fn search_tool_hides_apps_tools_without_search() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn explicit_app_mentions_respect_always_defer() -> Result<()> { +async fn explicit_app_mentions_leave_app_tools_deferred() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -438,13 +442,7 @@ async fn explicit_app_mentions_respect_always_defer() -> Result<()> { ) .await; - let mut builder = - configured_builder(apps_server.chatgpt_base_url.clone()).with_config(|config| { - config - .features - .enable(Feature::ToolSearchAlwaysDeferMcpTools) - .expect("test config should allow feature update"); - }); + let mut builder = configured_builder(apps_server.chatgpt_base_url.clone()); let test = builder.build(&server).await?; test.submit_turn_with_approval_and_permission_profile( @@ -458,7 +456,7 @@ async fn explicit_app_mentions_respect_always_defer() -> Result<()> { let tools = tool_names(&body); assert!( tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME), - "explicit app mentions should leave app tools deferred when always-defer is active: {tools:?}" + "explicit app mentions should leave app tools deferred: {tools:?}" ); assert!( namespace_child_tool( @@ -1214,10 +1212,6 @@ async fn tool_search_surfaced_mcp_tool_errors_are_returned_to_model() -> Result< let rmcp_test_server_bin = stdio_server_bin()?; let mut builder = configured_builder(apps_server.chatgpt_base_url.clone()).with_config(move |config| { - config - .features - .enable(Feature::ToolSearchAlwaysDeferMcpTools) - .expect("test config should allow feature update"); let mut servers = config.mcp_servers.get().clone(); servers.insert( "rmcp".to_string(), diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index 210b8689a..86fdd2e29 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -157,7 +157,7 @@ pub enum Feature { AppsMcpPathOverride, /// Removed compatibility flag retained as a no-op now that tool_search is always enabled. ToolSearch, - /// Always defer MCP tools behind tool_search instead of exposing small sets directly. + /// Removed compatibility flag. MCP tools are always deferred when tool_search is available. ToolSearchAlwaysDeferMcpTools, /// Expose MCP model-visible namespaces without the legacy `mcp__` prefix. NonPrefixedMcpToolNames, @@ -473,7 +473,7 @@ impl Features { "apply_patch_freeform" => { continue; } - "tool_search" | "apps_mcp_path_override" => { + "tool_search" | "tool_search_always_defer_mcp_tools" | "apps_mcp_path_override" => { continue; } "image_detail_original" | "resize_all_images" => { @@ -1074,8 +1074,8 @@ pub const FEATURES: &[FeatureSpec] = &[ FeatureSpec { id: Feature::ToolSearchAlwaysDeferMcpTools, key: "tool_search_always_defer_mcp_tools", - stage: Stage::UnderDevelopment, - default_enabled: false, + stage: Stage::Removed, + default_enabled: true, }, FeatureSpec { id: Feature::NonPrefixedMcpToolNames, diff --git a/codex-rs/features/src/tests.rs b/codex-rs/features/src/tests.rs index b9902320b..69fd59e13 100644 --- a/codex-rs/features/src/tests.rs +++ b/codex-rs/features/src/tests.rs @@ -561,6 +561,25 @@ fn from_sources_ignores_removed_plugin_hooks_feature_key() { assert_eq!(features, Features::with_defaults()); } +#[test] +fn from_sources_ignores_removed_tool_search_always_defer_mcp_tools_feature_key() { + let features_toml = FeaturesToml::from(BTreeMap::from([( + "tool_search_always_defer_mcp_tools".to_string(), + false, + )])); + + let features = Features::from_sources( + FeatureConfigSource { + features: Some(&features_toml), + ..Default::default() + }, + FeatureConfigSource::default(), + FeatureOverrides::default(), + ); + + assert_eq!(features, Features::with_defaults()); +} + #[test] fn multi_agent_v2_feature_config_deserializes_boolean_toggle() { let features: FeaturesToml = toml::from_str(