[codex] Add model tool mode selector (#25031)

## Why
Some models need to select their code-execution behavior through model
catalog metadata. Models without that metadata must continue to follow
the existing `CodeMode` and `CodeModeOnly` feature flags, including when
a newer server sends an enum value this client does not recognize.

## What changed
- add optional `ModelInfo.tool_mode` metadata with `direct`,
`code_mode`, and `code_mode_only`
- treat omitted and unknown wire values as `None`
- resolve `None` from the existing feature flags
- carry the resolved `ToolMode` directly on `TurnContext`, outside
`Config`
- use the resolved value for turn creation, model switches, review
turns, tool planning, and code execution

## Coverage
- add protocol coverage for omitted, known, and unknown enum values
- add focused coverage for flag fallback and explicit metadata
overriding feature flags
- add core integration coverage that fetches remote model metadata
through `/v1/models` and verifies the outbound `/responses` tools for
explicit `direct` and `code_mode_only` selectors

## Stack
- followed by #25032
This commit is contained in:
Ahmed Ibrahim
2026-05-29 09:05:05 -07:00
committed by GitHub
Unverified
parent 251b2412b2
commit 5577a9e148
19 changed files with 329 additions and 16 deletions
@@ -52,6 +52,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}
}
@@ -98,6 +98,7 @@ async fn models_client_hits_models_endpoint() {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}],
};
+11
View File
@@ -1,4 +1,5 @@
use super::*;
use codex_protocol::openai_models::ToolMode;
use std::sync::atomic::AtomicBool;
/// Spawn a review thread using the given prompt.
@@ -47,6 +48,15 @@ pub(super) async fn spawn_review_thread(
let mut per_turn_config = (*config).clone();
per_turn_config.model = Some(model.clone());
per_turn_config.features = review_features.clone();
let tool_mode = model_info.tool_mode.unwrap_or_else(|| {
if per_turn_config.features.enabled(Feature::CodeModeOnly) {
ToolMode::CodeModeOnly
} else if per_turn_config.features.enabled(Feature::CodeMode) {
ToolMode::CodeMode
} else {
ToolMode::Direct
}
});
if let Err(err) = per_turn_config.web_search_mode.set(review_web_search_mode) {
let fallback_value = per_turn_config.web_search_mode.value();
tracing::warn!(
@@ -96,6 +106,7 @@ pub(super) async fn spawn_review_thread(
config: per_turn_config,
auth_manager: auth_manager_for_context,
model_info: model_info.clone(),
tool_mode,
session_telemetry: session_telemetry_for_context,
provider: provider_for_context,
reasoning_effort,
+22
View File
@@ -6,6 +6,7 @@ use codex_model_provider::SharedModelProvider;
use codex_model_provider::create_model_provider;
use codex_protocol::SessionId;
use codex_protocol::models::AdditionalPermissionProfile;
use codex_protocol::openai_models::ToolMode;
use codex_protocol::protocol::ThreadSource;
use codex_protocol::protocol::TurnEnvironmentSelection;
use codex_sandboxing::compatibility_sandbox_policy_for_permission_profile;
@@ -55,6 +56,7 @@ pub struct TurnContext {
pub config: Arc<Config>,
pub(crate) auth_manager: Option<Arc<AuthManager>>,
pub(crate) model_info: ModelInfo,
pub(crate) tool_mode: ToolMode,
pub(crate) session_telemetry: SessionTelemetry,
pub(crate) provider: SharedModelProvider,
pub(crate) reasoning_effort: Option<ReasoningEffortConfig>,
@@ -172,6 +174,15 @@ impl TurnContext {
let model_info = models_manager
.get_model_info(model.as_str(), &config.to_models_manager_config())
.await;
let tool_mode = model_info.tool_mode.unwrap_or_else(|| {
if config.features.enabled(Feature::CodeModeOnly) {
ToolMode::CodeModeOnly
} else if config.features.enabled(Feature::CodeMode) {
ToolMode::CodeMode
} else {
ToolMode::Direct
}
});
let truncation_policy = model_info.truncation_policy.into();
let supported_reasoning_levels = model_info
.supported_reasoning_levels
@@ -212,6 +223,7 @@ impl TurnContext {
config: Arc::new(config),
auth_manager: self.auth_manager.clone(),
model_info: model_info.clone(),
tool_mode,
session_telemetry: self
.session_telemetry
.clone()
@@ -475,6 +487,15 @@ impl Session {
);
let mut per_turn_config = per_turn_config;
let tool_mode = model_info.tool_mode.unwrap_or_else(|| {
if per_turn_config.features.enabled(Feature::CodeModeOnly) {
ToolMode::CodeModeOnly
} else if per_turn_config.features.enabled(Feature::CodeMode) {
ToolMode::CodeMode
} else {
ToolMode::Direct
}
});
per_turn_config.service_tier = get_service_tier(
per_turn_config.service_tier,
per_turn_config.features.enabled(Feature::FastMode),
@@ -501,6 +522,7 @@ impl Session {
config: per_turn_config.clone(),
auth_manager: auth_manager_for_context,
model_info: model_info.clone(),
tool_mode,
session_telemetry: session_telemetry_for_context,
provider: provider_for_context,
reasoning_effort,
+2 -2
View File
@@ -30,7 +30,7 @@ use crate::tools::parallel::ToolCallRuntime;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use crate::unified_exec::resolve_max_tokens;
use codex_features::Feature;
use codex_protocol::openai_models::ToolMode;
use codex_tools::ToolName;
use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::formatted_truncate_text_content_items_with_policy;
@@ -91,7 +91,7 @@ impl CodeModeService {
router: Arc<ToolRouter>,
tracker: SharedTurnDiffTracker,
) -> Option<codex_code_mode::CodeModeTurnWorker> {
if !turn.features.enabled(Feature::CodeMode) {
if !matches!(turn.tool_mode, ToolMode::CodeMode | ToolMode::CodeModeOnly) {
return None;
}
+15 -14
View File
@@ -60,6 +60,7 @@ use codex_mcp::ToolInfo;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ToolMode;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_tools::DiscoverableTool;
@@ -230,8 +231,10 @@ fn spec_for_model_request(
exposure: ToolExposure,
spec: ToolSpec,
) -> ToolSpec {
if code_mode_enabled(turn_context)
&& exposure != ToolExposure::DirectModelOnly
if matches!(
turn_context.tool_mode,
ToolMode::CodeMode | ToolMode::CodeModeOnly
) && exposure != ToolExposure::DirectModelOnly
&& codex_code_mode::is_code_mode_nested_tool(spec.name())
{
codex_tools::augment_tool_spec_for_code_mode(spec)
@@ -282,14 +285,6 @@ fn namespace_tools_enabled(turn_context: &TurnContext) -> bool {
turn_context.provider.capabilities().namespace_tools
}
fn code_mode_enabled(turn_context: &TurnContext) -> bool {
turn_context.features.get().enabled(Feature::CodeMode)
}
fn code_mode_only_enabled(turn_context: &TurnContext) -> bool {
code_mode_enabled(turn_context) && turn_context.features.get().enabled(Feature::CodeModeOnly)
}
fn multi_agent_v2_enabled(turn_context: &TurnContext) -> bool {
turn_context.features.get().enabled(Feature::MultiAgentV2)
}
@@ -398,7 +393,7 @@ fn is_hidden_by_code_mode_only(
tool_name: &ToolName,
exposure: ToolExposure,
) -> bool {
code_mode_only_enabled(turn_context)
turn_context.tool_mode == ToolMode::CodeModeOnly
&& exposure != ToolExposure::DirectModelOnly
&& codex_code_mode::is_code_mode_nested_tool(&codex_tools::code_mode_name_for_tool_name(
tool_name,
@@ -410,7 +405,10 @@ fn build_code_mode_executors(
executors: &[Arc<dyn CoreToolRuntime>],
deferred_tools_available: bool,
) -> Vec<Arc<dyn CoreToolRuntime>> {
if !code_mode_enabled(turn_context) {
if !matches!(
turn_context.tool_mode,
ToolMode::CodeMode | ToolMode::CodeModeOnly
) {
return vec![];
}
@@ -444,7 +442,7 @@ fn build_code_mode_executors(
create_code_mode_tool(
&enabled_tools,
&namespace_descriptions,
code_mode_only_enabled(turn_context),
turn_context.tool_mode == ToolMode::CodeModeOnly,
deferred_tools_available,
),
code_mode_nested_tool_specs,
@@ -847,7 +845,10 @@ fn append_extension_tool_executors(
.iter()
.map(|executor| executor.tool_name())
.collect::<HashSet<_>>();
if code_mode_enabled(turn_context) {
if matches!(
turn_context.tool_mode,
ToolMode::CodeMode | ToolMode::CodeModeOnly
) {
reserved_tool_names.insert(ToolName::plain(codex_code_mode::PUBLIC_TOOL_NAME));
reserved_tool_names.insert(ToolName::plain(codex_code_mode::WAIT_TOOL_NAME));
}
@@ -13,6 +13,7 @@ use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::openai_models::ApplyPatchToolType;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ToolMode;
use codex_protocol::openai_models::WebSearchToolType;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
@@ -215,6 +216,15 @@ fn set_feature(turn: &mut TurnContext, feature: Feature, enabled: bool) {
.expect("test feature should be disableable in config");
}
turn.config = Arc::new(config);
turn.tool_mode = turn.model_info.tool_mode.unwrap_or_else(|| {
if turn.config.features.enabled(Feature::CodeModeOnly) {
ToolMode::CodeModeOnly
} else if turn.config.features.enabled(Feature::CodeMode) {
ToolMode::CodeMode
} else {
ToolMode::Direct
}
});
}
fn set_features(turn: &mut TurnContext, features: &[Feature]) {
@@ -797,6 +807,20 @@ async fn multi_agent_feature_selects_one_agent_tool_family() {
);
}
#[tokio::test]
async fn tool_mode_selector_overrides_feature_flags() {
let direct = probe(|turn| {
set_features(turn, &[Feature::CodeMode, Feature::CodeModeOnly]);
turn.model_info.tool_mode = Some(ToolMode::Direct);
turn.tool_mode = ToolMode::Direct;
})
.await;
direct.assert_visible_lacks(&[
codex_code_mode::PUBLIC_TOOL_NAME,
codex_code_mode::WAIT_TOOL_NAME,
]);
}
#[tokio::test]
async fn v1_multi_agent_tools_defer_when_tool_search_available() {
let plan = probe(|turn| {
+1
View File
@@ -63,6 +63,7 @@ mod json_result;
mod live_cli;
mod mcp_turn_metadata;
mod model_overrides;
mod model_runtime_selectors;
mod model_switching;
mod model_visible_layout;
mod models_cache_ttl;
@@ -0,0 +1,173 @@
use anyhow::Result;
use codex_core::config::Config;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_models_manager::manager::RefreshStrategy;
use codex_models_manager::manager::SharedModelsManager;
use codex_models_manager::model_info::model_info_from_slug;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ToolMode;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ThreadSettingsOverrides;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::submit_thread_settings;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use serde_json::Value;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;
fn remote_model(slug: &str) -> ModelInfo {
ModelInfo {
visibility: ModelVisibility::List,
used_fallback_model_metadata: false,
..model_info_from_slug(slug)
}
}
fn tool_names(body: &Value) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.map(|tools| {
tools
.iter()
.filter_map(|tool| {
tool.get("name")
.or_else(|| tool.get("type"))
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
.unwrap_or_default()
}
async fn wait_for_model_available(manager: &SharedModelsManager, slug: &str) -> ModelPreset {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if let Some(model) = manager
.list_models(RefreshStrategy::Online)
.await
.iter()
.find(|model| model.model == slug)
.cloned()
{
return model;
}
if Instant::now() >= deadline {
panic!("timed out waiting for the remote model {slug} to appear");
}
sleep(Duration::from_millis(25)).await;
}
}
async fn response_body_for_remote_model(
remote_model: ModelInfo,
configure: impl FnOnce(&mut Config) + Send + 'static,
) -> Result<Value> {
let server = responses::start_mock_server().await;
let model_slug = remote_model.slug.clone();
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let response_mock = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(configure);
let test = builder.build(&server).await?;
let models_manager = test.thread_manager.get_models_manager();
let available_model = wait_for_model_available(&models_manager, &model_slug).await;
assert_eq!(available_model.model, model_slug);
assert_eq!(models_mock.requests().len(), 1);
submit_thread_settings(
&test.codex,
ThreadSettingsOverrides {
model: Some(model_slug),
..Default::default()
},
)
.await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "list tools".into(),
text_elements: Vec::new(),
}],
environments: None,
final_output_json_schema: None,
responsesapi_client_metadata: None,
additional_context: Default::default(),
thread_settings: Default::default(),
})
.await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
Ok(response_mock.single_request().body_json())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_tool_mode_selector_overrides_feature_flags() -> Result<()> {
skip_if_no_network!(Ok(()));
let mut direct_model = remote_model("test-tool-mode-direct");
direct_model.tool_mode = Some(ToolMode::Direct);
let direct_body = response_body_for_remote_model(direct_model, |config| {
config
.features
.enable(Feature::CodeModeOnly)
.expect("test config should allow feature update");
})
.await?;
let direct_tools = tool_names(&direct_body);
assert!(
direct_tools
.iter()
.all(|name| name != codex_code_mode::PUBLIC_TOOL_NAME
&& name != codex_code_mode::WAIT_TOOL_NAME),
"direct mode should override enabled code mode flags: {direct_tools:?}"
);
let mut code_mode_only_model = remote_model("test-tool-mode-code-mode-only");
code_mode_only_model.tool_mode = Some(ToolMode::CodeModeOnly);
let code_mode_only_body = response_body_for_remote_model(code_mode_only_model, |_| {}).await?;
assert_eq!(
tool_names(&code_mode_only_body),
vec![
codex_code_mode::PUBLIC_TOOL_NAME.to_string(),
codex_code_mode::WAIT_TOOL_NAME.to_string(),
]
);
Ok(())
}
@@ -112,6 +112,7 @@ fn test_model_info(
input_modalities,
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
@@ -929,6 +930,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
@@ -370,5 +370,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}
}
+2
View File
@@ -592,6 +592,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
};
let _models_mock = mount_models_once(
@@ -702,6 +703,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() -
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
};
let _models_mock = mount_models_once(
@@ -477,6 +477,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
@@ -726,6 +727,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
@@ -1209,6 +1211,7 @@ fn test_remote_model_with_policy(
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
+1
View File
@@ -1377,6 +1377,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re
input_modalities: vec![InputModality::Text],
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}],
},
)
@@ -60,6 +60,7 @@ fn test_model_info(
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers,
+1
View File
@@ -1355,6 +1355,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
input_modalities: vec![InputModality::Text],
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
@@ -99,6 +99,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: true, // this is the fallback model metadata
supports_search_tool: false,
tool_mode: None,
}
}
+66
View File
@@ -8,7 +8,9 @@ use std::str::FromStr;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Deserializer;
use serde::Serialize;
use serde::de::DeserializeOwned;
use strum::IntoEnumIterator;
use strum_macros::Display;
use strum_macros::EnumIter;
@@ -227,6 +229,25 @@ pub enum TruncationMode {
Tokens,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, TS, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum ToolMode {
Direct,
CodeMode,
CodeModeOnly,
}
fn deserialize_optional_model_selector<'de, D, T>(deserializer: D) -> Result<Option<T>, D::Error>
where
D: Deserializer<'de>,
T: DeserializeOwned,
{
let Some(value) = Option::<String>::deserialize(deserializer)? else {
return Ok(None);
};
Ok(serde_json::from_value(serde_json::Value::String(value)).ok())
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, TS, JsonSchema)]
pub struct TruncationPolicyConfig {
pub mode: TruncationMode,
@@ -318,6 +339,12 @@ pub struct ModelInfo {
pub used_fallback_model_metadata: bool,
#[serde(default)]
pub supports_search_tool: bool,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "deserialize_optional_model_selector"
)]
pub tool_mode: Option<ToolMode>,
}
impl ModelInfo {
@@ -612,6 +639,7 @@ mod tests {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}
}
@@ -829,6 +857,44 @@ mod tests {
assert!(!model.supports_image_detail_original);
assert_eq!(model.web_search_tool_type, WebSearchToolType::Text);
assert!(!model.supports_search_tool);
assert_eq!(model.tool_mode, None);
}
#[test]
fn model_info_deserializes_known_tool_mode() {
let mut value =
serde_json::to_value(test_model(/*spec*/ None)).expect("serialize test model");
let object = value
.as_object_mut()
.expect("model info should be an object");
object.insert(
"tool_mode".to_string(),
serde_json::Value::String("code_mode_only".to_string()),
);
let model = serde_json::from_value::<ModelInfo>(value).expect("deserialize model info");
assert_eq!(model.tool_mode, Some(ToolMode::CodeModeOnly));
}
#[test]
fn model_info_treats_unknown_tool_mode_as_omitted() {
let mut value =
serde_json::to_value(test_model(/*spec*/ None)).expect("serialize test model");
let object = value
.as_object_mut()
.expect("model info should be an object");
object.insert(
"tool_mode".to_string(),
serde_json::Value::String("future_tool_mode".to_string()),
);
let model = serde_json::from_value::<ModelInfo>(value).expect("deserialize model info");
assert_eq!(model.tool_mode, None);
let serialized = serde_json::to_value(model).expect("serialize model info");
let object = serialized
.as_object()
.expect("model info should be an object");
assert!(!object.contains_key("tool_mode"));
}
#[test]
+1
View File
@@ -44,6 +44,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo {
input_modalities: codex_protocol::openai_models::default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
tool_mode: None,
}
}