diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index aaf9f6947..56a937866 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -52,6 +52,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 38d8dc986..742b39d5f 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -98,6 +98,7 @@ async fn models_client_hits_models_endpoint() { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, }], }; diff --git a/codex-rs/core/src/guardian/review.rs b/codex-rs/core/src/guardian/review.rs index 8f53e8f8f..244d6e09e 100644 --- a/codex-rs/core/src/guardian/review.rs +++ b/codex-rs/core/src/guardian/review.rs @@ -682,11 +682,13 @@ pub(super) async fn run_guardian_review_session( fallback } }; - let preferred_model_id = turn.provider.approval_review_preferred_model(); - let preferred_model = available_models + let model_override = turn.model_info.auto_review_model_override.as_deref(); + let review_model_id = + model_override.unwrap_or_else(|| turn.provider.approval_review_preferred_model()); + let review_model = available_models .iter() - .find(|preset| preset.model == preferred_model_id); - let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = preferred_model { + .find(|preset| preset.model == review_model_id); + let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = review_model { let reasoning_effort = preferred_reasoning_effort( preset .supported_reasoning_efforts @@ -694,7 +696,7 @@ pub(super) async fn run_guardian_review_session( .any(|effort| effort.effort == codex_protocol::openai_models::ReasoningEffort::Low), Some(preset.default_reasoning_effort), ); - (preferred_model_id.to_string(), reasoning_effort) + (review_model_id.to_string(), reasoning_effort) } else { let reasoning_effort = preferred_reasoning_effort( turn.model_info @@ -704,7 +706,12 @@ pub(super) async fn run_guardian_review_session( turn.reasoning_effort .or(turn.model_info.default_reasoning_level), ); - (turn.model_info.slug.clone(), reasoning_effort) + ( + model_override + .unwrap_or(turn.model_info.slug.as_str()) + .to_string(), + reasoning_effort, + ) }; let guardian_config = build_guardian_review_session_config( turn.config.as_ref(), diff --git a/codex-rs/core/src/guardian/tests.rs b/codex-rs/core/src/guardian/tests.rs index 3d44fa41d..40a07d380 100644 --- a/codex-rs/core/src/guardian/tests.rs +++ b/codex-rs/core/src/guardian/tests.rs @@ -1288,6 +1288,95 @@ fn guardian_output_schema_requires_only_outcome_and_allows_optional_details() { ); } +async fn guardian_request_model_for_auto_review_override( + auto_review_model_override: Option, +) -> anyhow::Result<(String, String, String)> { + let server = start_mock_server().await; + let guardian_assessment = serde_json::json!({ + "outcome": "allow", + }) + .to_string(); + let request_log = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-guardian"), + ev_assistant_message("msg-guardian", &guardian_assessment), + ev_completed("resp-guardian"), + ]), + ) + .await; + + let (session, mut turn) = guardian_test_session_and_turn(&server).await; + Arc::get_mut(&mut turn) + .expect("turn should be unique") + .model_info + .auto_review_model_override = auto_review_model_override; + let parent_model = turn.model_info.slug.clone(); + let preferred_model = turn.provider.approval_review_preferred_model().to_string(); + seed_guardian_parent_history(&session, &turn).await; + + let outcome = run_guardian_review_session_for_test( + Arc::clone(&session), + turn, + GuardianApprovalRequest::Shell { + id: "shell-1".to_string(), + command: vec!["git".to_string(), "push".to_string()], + cwd: test_path_buf("/repo/codex-rs/core").abs(), + sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault, + additional_permissions: None, + justification: None, + }, + Some("Sandbox denied outbound git push to github.com.".to_string()), + guardian_output_schema(), + /*external_cancel*/ None, + ) + .await; + let (GuardianReviewOutcome::Completed(_), _) = outcome else { + panic!("expected guardian assessment"); + }; + + let request_model = request_log + .single_request() + .body_json() + .get("model") + .and_then(|value| value.as_str()) + .expect("guardian request should include a model") + .to_string(); + + Ok((request_model, parent_model, preferred_model)) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_review_uses_model_catalog_override_when_preferred_review_model_exists() +-> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let override_model = "guardian-review-model-override".to_string(); + let (request_model, parent_model, preferred_model) = + guardian_request_model_for_auto_review_override(Some(override_model.clone())).await?; + + assert_eq!(request_model, override_model); + assert_ne!(request_model, parent_model); + assert_ne!(request_model, preferred_model); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_review_uses_preferred_review_model_without_model_catalog_override() +-> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let (request_model, parent_model, preferred_model) = + guardian_request_model_for_auto_review_override(/*auto_review_model_override*/ None) + .await?; + + assert_eq!(request_model, preferred_model); + assert_ne!(request_model, parent_model); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn guardian_review_request_layout_matches_model_visible_request_snapshot() -> anyhow::Result<()> { diff --git a/codex-rs/core/tests/suite/auto_review.rs b/codex-rs/core/tests/suite/auto_review.rs new file mode 100644 index 000000000..b3574cf5e --- /dev/null +++ b/codex-rs/core/tests/suite/auto_review.rs @@ -0,0 +1,259 @@ +#![allow(clippy::expect_used)] + +use anyhow::Result; +use codex_features::Feature; +use codex_login::CodexAuth; +use codex_models_manager::manager::RefreshStrategy; +use codex_protocol::config_types::ApprovalsReviewer; +use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::models::PermissionProfile; +use codex_protocol::openai_models::ApplyPatchToolType; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ModelsResponse; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::request_permissions::PermissionGrantScope; +use codex_protocol::request_permissions::RequestPermissionsResponse; +use codex_protocol::user_input::UserInput; +use core_test_support::responses::ev_apply_patch_custom_tool_call; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_models_once; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::skip_if_no_network; +use core_test_support::skip_if_sandbox; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use core_test_support::test_codex::turn_permission_fields; +use core_test_support::wait_for_event; +use pretty_assertions::assert_eq; +use serde_json::json; +use wiremock::MockServer; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_model_override_uses_catalog_model_for_strict_auto_review() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::start().await; + let model = "remote-auto-review-parent"; + let review_model = "remote-auto-review-reviewer"; + mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model_with_auto_review_override(model, review_model)], + }, + ) + .await; + + let permissions_call_id = "auto-review-permissions-call"; + let permissions_args = json!({ + "reason": "exercise strict Guardian model selection", + "permissions": { + "network": { + "enabled": true, + }, + }, + }); + let patch_call_id = "auto-review-patch-call"; + let patch = "*** Begin Patch\n*** Add File: auto-review-model-override.txt\n+exercise Guardian model selection\n*** End Patch\n"; + let responses = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-parent-1"), + ev_function_call( + permissions_call_id, + "request_permissions", + &serde_json::to_string(&permissions_args)?, + ), + ev_completed("resp-parent-1"), + ]), + sse(vec![ + ev_response_created("resp-parent-2"), + ev_apply_patch_custom_tool_call(patch_call_id, patch), + ev_completed("resp-parent-2"), + ]), + sse(vec![ + ev_response_created("resp-guardian"), + ev_assistant_message( + "msg-guardian", + &json!({ + "risk_level": "low", + "user_authorization": "high", + "outcome": "allow", + "rationale": "The patch only exercises Guardian model selection.", + }) + .to_string(), + ), + ev_completed("resp-guardian"), + ]), + sse(vec![ + ev_response_created("resp-parent-3"), + ev_assistant_message("msg-parent", "done"), + ev_completed("resp-parent-3"), + ]), + ], + ) + .await; + + let mut builder = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_config(|config| { + config.model = Some("gpt-5.4".to_string()); + config.approvals_reviewer = ApprovalsReviewer::User; + config + .features + .enable(Feature::ExecPermissionApprovals) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::RequestPermissionsTool) + .expect("test config should allow feature update"); + }); + let TestCodex { + codex, + cwd, + config, + thread_manager, + .. + } = builder.build(&server).await?; + + let models_manager = thread_manager.get_models_manager(); + models_manager + .list_models(RefreshStrategy::OnlineIfUncached) + .await; + let model_info = models_manager + .get_model_info(model, &config.to_models_manager_config()) + .await; + assert_eq!( + model_info.auto_review_model_override, + Some(review_model.to_string()) + ); + + core_test_support::submit_thread_settings( + &codex, + codex_protocol::protocol::ThreadSettingsOverrides { + model: Some(model.to_string()), + ..Default::default() + }, + ) + .await?; + + let cwd_path = cwd.path().to_path_buf(); + let (sandbox_policy, permission_profile) = + turn_permission_fields(PermissionProfile::read_only(), cwd_path.as_path()); + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "run the Guardian model override check".into(), + text_elements: Vec::new(), + }], + environments: None, + final_output_json_schema: None, + responsesapi_client_metadata: None, + additional_context: Default::default(), + thread_settings: codex_protocol::protocol::ThreadSettingsOverrides { + cwd: Some(cwd_path), + approval_policy: Some(AskForApproval::OnRequest), + sandbox_policy: Some(sandbox_policy), + permission_profile, + ..Default::default() + }, + }) + .await?; + + let permissions_request = wait_for_event(&codex, |event| { + matches!( + event, + EventMsg::RequestPermissions(_) | EventMsg::TurnComplete(_) + ) + }) + .await; + let EventMsg::RequestPermissions(permissions_request) = permissions_request else { + panic!("expected request_permissions before completion"); + }; + assert_eq!(permissions_request.call_id, permissions_call_id); + codex + .submit(Op::RequestPermissionsResponse { + id: permissions_request.call_id, + response: RequestPermissionsResponse { + permissions: permissions_request.permissions, + scope: PermissionGrantScope::Turn, + strict_auto_review: true, + }, + }) + .await?; + + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + + let guardian_request = responses + .requests() + .into_iter() + .find(|request| { + request.body_contains_text("auto-review-model-override.txt") + && request + .instructions_text() + .starts_with("You are judging one planned coding-agent action.") + }) + .expect("expected Guardian request for apply_patch"); + assert_eq!( + guardian_request.body_json()["model"].as_str(), + Some(review_model) + ); + + Ok(()) +} + +fn remote_model_with_auto_review_override(slug: &str, review_model: &str) -> ModelInfo { + ModelInfo { + slug: slug.to_string(), + display_name: format!("{slug} display"), + description: Some(format!("{slug} description")), + default_reasoning_level: Some(ReasoningEffort::Medium), + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }], + shell_type: ConfigShellToolType::ShellCommand, + visibility: ModelVisibility::List, + supported_in_api: true, + input_modalities: default_input_modalities(), + used_fallback_model_metadata: false, + supports_search_tool: false, + auto_review_model_override: Some(review_model.to_string()), + tool_mode: None, + priority: 1, + additional_speed_tiers: Vec::new(), + service_tiers: Vec::new(), + default_service_tier: None, + upgrade: None, + base_instructions: "base instructions".to_string(), + model_messages: None, + supports_reasoning_summaries: false, + default_reasoning_summary: ReasoningSummary::Auto, + support_verbosity: false, + default_verbosity: None, + availability_nux: None, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + web_search_tool_type: Default::default(), + truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000), + supports_parallel_tool_calls: false, + supports_image_detail_original: false, + context_window: Some(272_000), + max_context_window: None, + auto_compact_token_limit: None, + effective_context_window_percent: 95, + experimental_supported_tools: Vec::new(), + } +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 51a5bea57..1e4a5f950 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -36,6 +36,7 @@ mod agents_md; mod apply_patch_cli; #[cfg(not(target_os = "windows"))] mod approvals; +mod auto_review; mod cli_stream; mod client; mod client_websockets; diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 90bb741f0..f7d2bf578 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -112,6 +112,7 @@ fn test_model_info( input_modalities, used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), @@ -930,6 +931,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index 76ebb4529..5244a178a 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -370,6 +370,7 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index 4cb8b63a0..4219576d4 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -592,6 +592,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, }; @@ -703,6 +704,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, }; diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 82db097f3..00222a3a3 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -477,6 +477,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), @@ -727,6 +728,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), @@ -1211,6 +1213,7 @@ fn test_remote_model_with_policy( input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority, additional_speed_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 0d669e5dd..c3d598c7b 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1349,6 +1349,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re input_modalities: vec![InputModality::Text], used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, }], }, diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index 895ef8545..787ba1050 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -60,6 +60,7 @@ fn test_model_info( input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 6062228de..0c6cdf6d8 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1355,6 +1355,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an input_modalities: vec![InputModality::Text], used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, priority: 1, additional_speed_tiers: Vec::new(), diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index e57b4e186..379d173e4 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -99,6 +99,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: true, // this is the fallback model metadata supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, } } diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index b9fa9c933..0214f450f 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -339,6 +339,8 @@ pub struct ModelInfo { pub used_fallback_model_metadata: bool, #[serde(default)] pub supports_search_tool: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub auto_review_model_override: Option, #[serde( default, skip_serializing_if = "Option::is_none", @@ -639,6 +641,7 @@ mod tests { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, } } @@ -857,6 +860,7 @@ mod tests { assert!(!model.supports_image_detail_original); assert_eq!(model.web_search_tool_type, WebSearchToolType::Text); assert!(!model.supports_search_tool); + assert_eq!(model.auto_review_model_override, None); assert_eq!(model.tool_mode, None); } diff --git a/codex-rs/tools/src/tool_config_tests.rs b/codex-rs/tools/src/tool_config_tests.rs index 2e49c1558..e7fc4d8c1 100644 --- a/codex-rs/tools/src/tool_config_tests.rs +++ b/codex-rs/tools/src/tool_config_tests.rs @@ -44,6 +44,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo { input_modalities: codex_protocol::openai_models::default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, tool_mode: None, } }