[codex-rs] auto-review model override (#23767)

## Why

Guardian auto-review normally uses the provider-preferred review model
when one is available. Some parent models need model-catalog metadata to
select a different review model while keeping older `/models` payloads
compatible when that metadata is absent.

## What changed

- Added optional `ModelInfo::auto_review_model_override` metadata to the
public model payload as a review-model slug.
- Updated Guardian review model selection to prefer the catalog override
when present, while preserving the existing provider preferred-model
path and parent-model fallback when it is omitted.
- Added focused Guardian coverage for override and no-override model
selection.
- Added an `auto_review` core integration suite test that loads override
metadata from a remote model catalog path and asserts the strict
auto-review `/responses` request uses the catalog-selected review model.
- Updated existing `ModelInfo` fixtures and local catalog constructors
for the new optional field.

## Validation

- `cargo test -p codex-protocol
model_info_defaults_availability_nux_to_none_when_omitted`
- `cargo test -p codex-core guardian_review_uses_`
- `cargo test -p codex-core
remote_model_override_uses_catalog_model_for_strict_auto_review --test
all`
- `just fix -p codex-protocol`
- `just fix -p codex-core`
- `just fmt`
- `git diff --check`
This commit is contained in:
Won Park
2026-06-01 11:51:15 -07:00
committed by GitHub
Unverified
parent 281b416c44
commit f1609d9fb6
16 changed files with 381 additions and 6 deletions
@@ -52,6 +52,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}
}
@@ -98,6 +98,7 @@ async fn models_client_hits_models_endpoint() {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}],
};
+13 -6
View File
@@ -682,11 +682,13 @@ pub(super) async fn run_guardian_review_session(
fallback
}
};
let preferred_model_id = turn.provider.approval_review_preferred_model();
let preferred_model = available_models
let model_override = turn.model_info.auto_review_model_override.as_deref();
let review_model_id =
model_override.unwrap_or_else(|| turn.provider.approval_review_preferred_model());
let review_model = available_models
.iter()
.find(|preset| preset.model == preferred_model_id);
let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = preferred_model {
.find(|preset| preset.model == review_model_id);
let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = review_model {
let reasoning_effort = preferred_reasoning_effort(
preset
.supported_reasoning_efforts
@@ -694,7 +696,7 @@ pub(super) async fn run_guardian_review_session(
.any(|effort| effort.effort == codex_protocol::openai_models::ReasoningEffort::Low),
Some(preset.default_reasoning_effort),
);
(preferred_model_id.to_string(), reasoning_effort)
(review_model_id.to_string(), reasoning_effort)
} else {
let reasoning_effort = preferred_reasoning_effort(
turn.model_info
@@ -704,7 +706,12 @@ pub(super) async fn run_guardian_review_session(
turn.reasoning_effort
.or(turn.model_info.default_reasoning_level),
);
(turn.model_info.slug.clone(), reasoning_effort)
(
model_override
.unwrap_or(turn.model_info.slug.as_str())
.to_string(),
reasoning_effort,
)
};
let guardian_config = build_guardian_review_session_config(
turn.config.as_ref(),
+89
View File
@@ -1288,6 +1288,95 @@ fn guardian_output_schema_requires_only_outcome_and_allows_optional_details() {
);
}
async fn guardian_request_model_for_auto_review_override(
auto_review_model_override: Option<String>,
) -> anyhow::Result<(String, String, String)> {
let server = start_mock_server().await;
let guardian_assessment = serde_json::json!({
"outcome": "allow",
})
.to_string();
let request_log = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-guardian"),
ev_assistant_message("msg-guardian", &guardian_assessment),
ev_completed("resp-guardian"),
]),
)
.await;
let (session, mut turn) = guardian_test_session_and_turn(&server).await;
Arc::get_mut(&mut turn)
.expect("turn should be unique")
.model_info
.auto_review_model_override = auto_review_model_override;
let parent_model = turn.model_info.slug.clone();
let preferred_model = turn.provider.approval_review_preferred_model().to_string();
seed_guardian_parent_history(&session, &turn).await;
let outcome = run_guardian_review_session_for_test(
Arc::clone(&session),
turn,
GuardianApprovalRequest::Shell {
id: "shell-1".to_string(),
command: vec!["git".to_string(), "push".to_string()],
cwd: test_path_buf("/repo/codex-rs/core").abs(),
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
additional_permissions: None,
justification: None,
},
Some("Sandbox denied outbound git push to github.com.".to_string()),
guardian_output_schema(),
/*external_cancel*/ None,
)
.await;
let (GuardianReviewOutcome::Completed(_), _) = outcome else {
panic!("expected guardian assessment");
};
let request_model = request_log
.single_request()
.body_json()
.get("model")
.and_then(|value| value.as_str())
.expect("guardian request should include a model")
.to_string();
Ok((request_model, parent_model, preferred_model))
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_review_uses_model_catalog_override_when_preferred_review_model_exists()
-> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let override_model = "guardian-review-model-override".to_string();
let (request_model, parent_model, preferred_model) =
guardian_request_model_for_auto_review_override(Some(override_model.clone())).await?;
assert_eq!(request_model, override_model);
assert_ne!(request_model, parent_model);
assert_ne!(request_model, preferred_model);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_review_uses_preferred_review_model_without_model_catalog_override()
-> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let (request_model, parent_model, preferred_model) =
guardian_request_model_for_auto_review_override(/*auto_review_model_override*/ None)
.await?;
assert_eq!(request_model, preferred_model);
assert_ne!(request_model, parent_model);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_review_request_layout_matches_model_visible_request_snapshot()
-> anyhow::Result<()> {
+259
View File
@@ -0,0 +1,259 @@
#![allow(clippy::expect_used)]
use anyhow::Result;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_models_manager::manager::RefreshStrategy;
use codex_protocol::config_types::ApprovalsReviewer;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::PermissionProfile;
use codex_protocol::openai_models::ApplyPatchToolType;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::openai_models::TruncationPolicyConfig;
use codex_protocol::openai_models::default_input_modalities;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::request_permissions::PermissionGrantScope;
use codex_protocol::request_permissions::RequestPermissionsResponse;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use serde_json::json;
use wiremock::MockServer;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_model_override_uses_catalog_model_for_strict_auto_review() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let model = "remote-auto-review-parent";
let review_model = "remote-auto-review-reviewer";
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model_with_auto_review_override(model, review_model)],
},
)
.await;
let permissions_call_id = "auto-review-permissions-call";
let permissions_args = json!({
"reason": "exercise strict Guardian model selection",
"permissions": {
"network": {
"enabled": true,
},
},
});
let patch_call_id = "auto-review-patch-call";
let patch = "*** Begin Patch\n*** Add File: auto-review-model-override.txt\n+exercise Guardian model selection\n*** End Patch\n";
let responses = mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-parent-1"),
ev_function_call(
permissions_call_id,
"request_permissions",
&serde_json::to_string(&permissions_args)?,
),
ev_completed("resp-parent-1"),
]),
sse(vec![
ev_response_created("resp-parent-2"),
ev_apply_patch_custom_tool_call(patch_call_id, patch),
ev_completed("resp-parent-2"),
]),
sse(vec![
ev_response_created("resp-guardian"),
ev_assistant_message(
"msg-guardian",
&json!({
"risk_level": "low",
"user_authorization": "high",
"outcome": "allow",
"rationale": "The patch only exercises Guardian model selection.",
})
.to_string(),
),
ev_completed("resp-guardian"),
]),
sse(vec![
ev_response_created("resp-parent-3"),
ev_assistant_message("msg-parent", "done"),
ev_completed("resp-parent-3"),
]),
],
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some("gpt-5.4".to_string());
config.approvals_reviewer = ApprovalsReviewer::User;
config
.features
.enable(Feature::ExecPermissionApprovals)
.expect("test config should allow feature update");
config
.features
.enable(Feature::RequestPermissionsTool)
.expect("test config should allow feature update");
});
let TestCodex {
codex,
cwd,
config,
thread_manager,
..
} = builder.build(&server).await?;
let models_manager = thread_manager.get_models_manager();
models_manager
.list_models(RefreshStrategy::OnlineIfUncached)
.await;
let model_info = models_manager
.get_model_info(model, &config.to_models_manager_config())
.await;
assert_eq!(
model_info.auto_review_model_override,
Some(review_model.to_string())
);
core_test_support::submit_thread_settings(
&codex,
codex_protocol::protocol::ThreadSettingsOverrides {
model: Some(model.to_string()),
..Default::default()
},
)
.await?;
let cwd_path = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::read_only(), cwd_path.as_path());
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "run the Guardian model override check".into(),
text_elements: Vec::new(),
}],
environments: None,
final_output_json_schema: None,
responsesapi_client_metadata: None,
additional_context: Default::default(),
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd_path),
approval_policy: Some(AskForApproval::OnRequest),
sandbox_policy: Some(sandbox_policy),
permission_profile,
..Default::default()
},
})
.await?;
let permissions_request = wait_for_event(&codex, |event| {
matches!(
event,
EventMsg::RequestPermissions(_) | EventMsg::TurnComplete(_)
)
})
.await;
let EventMsg::RequestPermissions(permissions_request) = permissions_request else {
panic!("expected request_permissions before completion");
};
assert_eq!(permissions_request.call_id, permissions_call_id);
codex
.submit(Op::RequestPermissionsResponse {
id: permissions_request.call_id,
response: RequestPermissionsResponse {
permissions: permissions_request.permissions,
scope: PermissionGrantScope::Turn,
strict_auto_review: true,
},
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let guardian_request = responses
.requests()
.into_iter()
.find(|request| {
request.body_contains_text("auto-review-model-override.txt")
&& request
.instructions_text()
.starts_with("You are judging one planned coding-agent action.")
})
.expect("expected Guardian request for apply_patch");
assert_eq!(
guardian_request.body_json()["model"].as_str(),
Some(review_model)
);
Ok(())
}
fn remote_model_with_auto_review_override(slug: &str, review_model: &str) -> ModelInfo {
ModelInfo {
slug: slug.to_string(),
display_name: format!("{slug} display"),
description: Some(format!("{slug} description")),
default_reasoning_level: Some(ReasoningEffort::Medium),
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::ShellCommand,
visibility: ModelVisibility::List,
supported_in_api: true,
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: Some(review_model.to_string()),
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
service_tiers: Vec::new(),
default_service_tier: None,
upgrade: None,
base_instructions: "base instructions".to_string(),
model_messages: None,
supports_reasoning_summaries: false,
default_reasoning_summary: ReasoningSummary::Auto,
support_verbosity: false,
default_verbosity: None,
availability_nux: None,
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
web_search_tool_type: Default::default(),
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
max_context_window: None,
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
}
}
+1
View File
@@ -36,6 +36,7 @@ mod agents_md;
mod apply_patch_cli;
#[cfg(not(target_os = "windows"))]
mod approvals;
mod auto_review;
mod cli_stream;
mod client;
mod client_websockets;
@@ -112,6 +112,7 @@ fn test_model_info(
input_modalities,
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
@@ -930,6 +931,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
@@ -370,6 +370,7 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}
}
+2
View File
@@ -592,6 +592,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
};
@@ -703,6 +704,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() -
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
};
@@ -477,6 +477,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
@@ -727,6 +728,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
@@ -1211,6 +1213,7 @@ fn test_remote_model_with_policy(
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority,
additional_speed_tiers: Vec::new(),
+1
View File
@@ -1349,6 +1349,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re
input_modalities: vec![InputModality::Text],
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}],
},
@@ -60,6 +60,7 @@ fn test_model_info(
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
+1
View File
@@ -1355,6 +1355,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
input_modalities: vec![InputModality::Text],
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
priority: 1,
additional_speed_tiers: Vec::new(),
@@ -99,6 +99,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: true, // this is the fallback model metadata
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}
}
+4
View File
@@ -339,6 +339,8 @@ pub struct ModelInfo {
pub used_fallback_model_metadata: bool,
#[serde(default)]
pub supports_search_tool: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub auto_review_model_override: Option<String>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
@@ -639,6 +641,7 @@ mod tests {
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}
}
@@ -857,6 +860,7 @@ mod tests {
assert!(!model.supports_image_detail_original);
assert_eq!(model.web_search_tool_type, WebSearchToolType::Text);
assert!(!model.supports_search_tool);
assert_eq!(model.auto_review_model_override, None);
assert_eq!(model.tool_mode, None);
}
+1
View File
@@ -44,6 +44,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo {
input_modalities: codex_protocol::openai_models::default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
auto_review_model_override: None,
tool_mode: None,
}
}